Commit 63428fb6 authored by David S. Miller's avatar David S. Miller

Merge branch 'net_sched-fix-filter-chain-reference-counting'

Cong Wang says:

====================
net_sched: fix filter chain reference counting

This patchset fixes tc filter chain reference counting and nasty race
conditions with RCU callbacks. Please see each patch for details.

v3: Rebase on the latest -net
    Add code comment in patch 1
    Improve comment and changelog for patch 2
    Add patch 3

v2: Add patch 1
    Get rid of more ugly code in patch 2
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents da8ab578 1697c4bb
...@@ -34,7 +34,6 @@ struct tc_action { ...@@ -34,7 +34,6 @@ struct tc_action {
struct gnet_stats_queue tcfa_qstats; struct gnet_stats_queue tcfa_qstats;
struct net_rate_estimator __rcu *tcfa_rate_est; struct net_rate_estimator __rcu *tcfa_rate_est;
spinlock_t tcfa_lock; spinlock_t tcfa_lock;
struct rcu_head tcfa_rcu;
struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats; struct gnet_stats_queue __percpu *cpu_qstats;
struct tc_cookie *act_cookie; struct tc_cookie *act_cookie;
...@@ -50,7 +49,6 @@ struct tc_action { ...@@ -50,7 +49,6 @@ struct tc_action {
#define tcf_qstats common.tcfa_qstats #define tcf_qstats common.tcfa_qstats
#define tcf_rate_est common.tcfa_rate_est #define tcf_rate_est common.tcfa_rate_est
#define tcf_lock common.tcfa_lock #define tcf_lock common.tcfa_lock
#define tcf_rcu common.tcfa_rcu
/* Update lastuse only if needed, to avoid dirtying a cache line. /* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice. * We use a temp variable to avoid fetching jiffies twice.
......
...@@ -53,10 +53,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, ...@@ -53,10 +53,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
res->goto_tp = rcu_dereference_bh(chain->filter_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain);
} }
static void free_tcf(struct rcu_head *head) /* XXX: For standalone actions, we don't need a RCU grace period either, because
* actions are always connected to filters and filters are already destroyed in
* RCU callbacks, so after a RCU grace period actions are already disconnected
* from filters. Readers later can not find us.
*/
static void free_tcf(struct tc_action *p)
{ {
struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats); free_percpu(p->cpu_qstats);
...@@ -76,11 +79,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) ...@@ -76,11 +79,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
spin_unlock_bh(&idrinfo->lock); spin_unlock_bh(&idrinfo->lock);
gen_kill_estimator(&p->tcfa_rate_est); gen_kill_estimator(&p->tcfa_rate_est);
/* free_tcf(p);
* gen_estimator est_timer() might access p->tcfa_lock
* or bstats, wait a RCU grace period before freeing p
*/
call_rcu(&p->tcfa_rcu, free_tcf);
} }
int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
...@@ -259,7 +258,7 @@ void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) ...@@ -259,7 +258,7 @@ void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
{ {
if (est) if (est)
gen_kill_estimator(&a->tcfa_rate_est); gen_kill_estimator(&a->tcfa_rate_est);
call_rcu(&a->tcfa_rcu, free_tcf); free_tcf(a);
} }
EXPORT_SYMBOL(tcf_idr_cleanup); EXPORT_SYMBOL(tcf_idr_cleanup);
......
...@@ -182,7 +182,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, ...@@ -182,7 +182,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
list_add_tail(&chain->list, &block->chain_list); list_add_tail(&chain->list, &block->chain_list);
chain->block = block; chain->block = block;
chain->index = chain_index; chain->index = chain_index;
chain->refcnt = 0; chain->refcnt = 1;
return chain; return chain;
} }
...@@ -194,47 +194,41 @@ static void tcf_chain_flush(struct tcf_chain *chain) ...@@ -194,47 +194,41 @@ static void tcf_chain_flush(struct tcf_chain *chain)
RCU_INIT_POINTER(*chain->p_filter_chain, NULL); RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
RCU_INIT_POINTER(chain->filter_chain, tp->next); RCU_INIT_POINTER(chain->filter_chain, tp->next);
tcf_chain_put(chain);
tcf_proto_destroy(tp); tcf_proto_destroy(tp);
} }
} }
static void tcf_chain_destroy(struct tcf_chain *chain) static void tcf_chain_destroy(struct tcf_chain *chain)
{ {
/* May be already removed from the list by the previous call. */ list_del(&chain->list);
if (!list_empty(&chain->list))
list_del_init(&chain->list);
/* There might still be a reference held when we got here from
* tcf_block_put. Wait for the user to drop reference before free.
*/
if (!chain->refcnt)
kfree(chain); kfree(chain);
} }
static void tcf_chain_hold(struct tcf_chain *chain)
{
++chain->refcnt;
}
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
bool create) bool create)
{ {
struct tcf_chain *chain; struct tcf_chain *chain;
list_for_each_entry(chain, &block->chain_list, list) { list_for_each_entry(chain, &block->chain_list, list) {
if (chain->index == chain_index) if (chain->index == chain_index) {
goto incref; tcf_chain_hold(chain);
return chain;
}
} }
chain = create ? tcf_chain_create(block, chain_index) : NULL;
incref: return create ? tcf_chain_create(block, chain_index) : NULL;
if (chain)
chain->refcnt++;
return chain;
} }
EXPORT_SYMBOL(tcf_chain_get); EXPORT_SYMBOL(tcf_chain_get);
void tcf_chain_put(struct tcf_chain *chain) void tcf_chain_put(struct tcf_chain *chain)
{ {
/* Destroy unused chain, with exception of chain 0, which is the if (--chain->refcnt == 0)
* default one and has to be always present.
*/
if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
tcf_chain_destroy(chain); tcf_chain_destroy(chain);
} }
EXPORT_SYMBOL(tcf_chain_put); EXPORT_SYMBOL(tcf_chain_put);
...@@ -279,10 +273,31 @@ void tcf_block_put(struct tcf_block *block) ...@@ -279,10 +273,31 @@ void tcf_block_put(struct tcf_block *block)
if (!block) if (!block)
return; return;
list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { /* XXX: Standalone actions are not allowed to jump to any chain, and
* bound actions should be all removed after flushing. However,
* filters are destroyed in RCU callbacks, we have to hold the chains
* first, otherwise we would always race with RCU callbacks on this list
* without proper locking.
*/
/* Wait for existing RCU callbacks to cool down. */
rcu_barrier();
/* Hold a refcnt for all chains, except 0, in case they are gone. */
list_for_each_entry(chain, &block->chain_list, list)
if (chain->index)
tcf_chain_hold(chain);
/* No race on the list, because no chain could be destroyed. */
list_for_each_entry(chain, &block->chain_list, list)
tcf_chain_flush(chain); tcf_chain_flush(chain);
tcf_chain_destroy(chain);
} /* Wait for RCU callbacks to release the reference count. */
rcu_barrier();
/* At this point, all the chains should have refcnt == 1. */
list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
tcf_chain_put(chain);
kfree(block); kfree(block);
} }
EXPORT_SYMBOL(tcf_block_put); EXPORT_SYMBOL(tcf_block_put);
...@@ -360,6 +375,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, ...@@ -360,6 +375,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
rcu_assign_pointer(*chain->p_filter_chain, tp); rcu_assign_pointer(*chain->p_filter_chain, tp);
RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
rcu_assign_pointer(*chain_info->pprev, tp); rcu_assign_pointer(*chain_info->pprev, tp);
tcf_chain_hold(chain);
} }
static void tcf_chain_tp_remove(struct tcf_chain *chain, static void tcf_chain_tp_remove(struct tcf_chain *chain,
...@@ -371,6 +387,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, ...@@ -371,6 +387,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
if (chain->p_filter_chain && tp == chain->filter_chain) if (chain->p_filter_chain && tp == chain->filter_chain)
RCU_INIT_POINTER(*chain->p_filter_chain, next); RCU_INIT_POINTER(*chain->p_filter_chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next); RCU_INIT_POINTER(*chain_info->pprev, next);
tcf_chain_put(chain);
} }
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment