Commit 6561a3b1 authored by David S. Miller's avatar David S. Miller

ipv4: Flush per-ns routing cache more sanely.

Flush the routing cache only of entries that match the
network namespace in which the purge event occurred.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Acked-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
parent 782615ae
......@@ -114,7 +114,7 @@ extern int ip_rt_init(void);
extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
__be32 src, struct net_device *dev);
extern void rt_cache_flush(struct net *net, int how);
extern void rt_cache_flush_batch(void);
extern void rt_cache_flush_batch(struct net *net);
extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
......
......@@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
rt_cache_flush(dev_net(dev), 0);
break;
case NETDEV_UNREGISTER_BATCH:
rt_cache_flush_batch();
/* The batch unregister is only called on the first
* device in the list of devices being unregistered.
* Therefore we should not pass dev_net(dev) in here.
*/
rt_cache_flush_batch(NULL);
break;
}
return NOTIFY_DONE;
......
......@@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
* Can be called by a softirq or a process.
* In the later case, we want to be reschedule if necessary
*/
static void rt_do_flush(int process_context)
static void rt_do_flush(struct net *net, int process_context)
{
unsigned int i;
struct rtable *rth, *next;
struct rtable * tail;
for (i = 0; i <= rt_hash_mask; i++) {
struct rtable __rcu **pprev;
struct rtable *list;
if (process_context && need_resched())
cond_resched();
rth = rcu_dereference_raw(rt_hash_table[i].chain);
......@@ -731,50 +733,32 @@ static void rt_do_flush(int process_context)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
struct rtable __rcu **prev;
struct rtable *p;
rth = rcu_dereference_protected(rt_hash_table[i].chain,
list = NULL;
pprev = &rt_hash_table[i].chain;
rth = rcu_dereference_protected(*pprev,
lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */
for (tail = rth; tail;
tail = rcu_dereference_protected(tail->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
rt_hash_table[i].chain = tail;
/* call rt_free on entries after the tail requiring flush */
prev = &rt_hash_table[i].chain;
for (p = rcu_dereference_protected(*prev,
while (rth) {
next = rcu_dereference_protected(rth->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i)));
p != NULL;
p = next) {
next = rcu_dereference_protected(p->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
if (!net ||
net_eq(dev_net(rth->dst.dev), net)) {
rcu_assign_pointer(*pprev, next);
rcu_assign_pointer(rth->dst.rt_next, list);
list = rth;
} else {
*prev = next;
rt_free(p);
pprev = &rth->dst.rt_next;
}
rth = next;
}
}
#else
rth = rcu_dereference_protected(rt_hash_table[i].chain,
lockdep_is_held(rt_hash_lock_addr(i)));
rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) {
next = rcu_dereference_protected(rth->dst.rt_next, 1);
rt_free(rth);
for (; list; list = next) {
next = rcu_dereference_protected(list->dst.rt_next, 1);
rt_free(list);
}
}
}
......@@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
{
rt_cache_invalidate(net);
if (delay >= 0)
rt_do_flush(!in_softirq());
rt_do_flush(net, !in_softirq());
}
/* Flush previous cache invalidated entries from the cache */
void rt_cache_flush_batch(void)
void rt_cache_flush_batch(struct net *net)
{
rt_do_flush(!in_softirq());
rt_do_flush(net, !in_softirq());
}
static void rt_emergency_hash_rebuild(struct net *net)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment