Commit a94b9367 authored by Wei Wang's avatar Wei Wang Committed by David S. Miller

ipv6: grab rt->rt6i_ref before allocating pcpu rt

After rwlock is replaced with rcu and spinlock, ip6_pol_route() will be
called with only rcu held. That means rt6 route deletion could happen
simultaneously with rt6_make_pcpu_rt(). This could potentially cause
memory leak if rt6_release() is called right before rt6_make_pcpu_rt()
on the same route.

This patch grabs rt->rt6i_ref safely before calling rt6_make_pcpu_rt()
to make sure rt6_release() will not get triggered while
rt6_make_pcpu_rt() is in progress. And rt6_release() is called after
rt6_make_pcpu_rt() is finished.

Note: As we are incrementing rt->rt6i_ref in ip6_pol_route(), there is a
very slim chance that fib6_purge_rt() will be triggered unnecessarily
when deleting a route if ip6_pol_route() running on another thread picks
this route as well and tries to make pcpu cache for it.
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 2b760fcf
...@@ -1070,7 +1070,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) ...@@ -1070,7 +1070,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
{ {
struct fib6_table *table = rt->rt6i_table;
struct rt6_info *pcpu_rt, *prev, **p; struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt); pcpu_rt = ip6_rt_pcpu_alloc(rt);
...@@ -1081,28 +1080,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) ...@@ -1081,28 +1080,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return net->ipv6.ip6_null_entry; return net->ipv6.ip6_null_entry;
} }
read_lock_bh(&table->tb6_lock); dst_hold(&pcpu_rt->dst);
if (rt->rt6i_pcpu) {
p = this_cpu_ptr(rt->rt6i_pcpu); p = this_cpu_ptr(rt->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt); prev = cmpxchg(p, NULL, pcpu_rt);
if (prev) { if (prev) {
/* If someone did it before us, return prev instead */ /* If someone did it before us, return prev instead */
/* release refcnt taken by ip6_rt_pcpu_alloc() */
dst_release_immediate(&pcpu_rt->dst); dst_release_immediate(&pcpu_rt->dst);
pcpu_rt = prev; /* release refcnt taken by above dst_hold() */
}
} else {
/* rt has been removed from the fib6 tree
* before we have a chance to acquire the read_lock.
* In this case, don't brother to create a pcpu rt
* since rt is going away anyway. The next
* dst_check() will trigger a re-lookup.
*/
dst_release_immediate(&pcpu_rt->dst); dst_release_immediate(&pcpu_rt->dst);
pcpu_rt = rt; dst_hold(&prev->dst);
pcpu_rt = prev;
} }
dst_hold(&pcpu_rt->dst);
rt6_dst_from_metrics_check(pcpu_rt); rt6_dst_from_metrics_check(pcpu_rt);
read_unlock_bh(&table->tb6_lock);
return pcpu_rt; return pcpu_rt;
} }
...@@ -1683,19 +1674,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, ...@@ -1683,19 +1674,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (pcpu_rt) { if (pcpu_rt) {
read_unlock_bh(&table->tb6_lock); read_unlock_bh(&table->tb6_lock);
} else { } else {
/* atomic_inc_not_zero() is needed when using rcu */
if (atomic_inc_not_zero(&rt->rt6i_ref)) {
/* We have to do the read_unlock first /* We have to do the read_unlock first
* because rt6_make_pcpu_route() may trigger * because rt6_make_pcpu_route() may trigger
* ip6_dst_gc() which will take the write_lock. * ip6_dst_gc() which will take the write_lock.
*
* No dst_hold() on rt is needed because grabbing
* rt->rt6i_ref makes sure rt can't be released.
*/ */
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock); read_unlock_bh(&table->tb6_lock);
pcpu_rt = rt6_make_pcpu_route(rt); pcpu_rt = rt6_make_pcpu_route(rt);
dst_release(&rt->dst); rt6_release(rt);
} else {
/* rt is already removed from tree */
read_unlock_bh(&table->tb6_lock);
pcpu_rt = net->ipv6.ip6_null_entry;
dst_hold(&pcpu_rt->dst);
}
} }
trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
return pcpu_rt; return pcpu_rt;
} }
} }
EXPORT_SYMBOL_GPL(ip6_pol_route); EXPORT_SYMBOL_GPL(ip6_pol_route);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment