ipv4: Flush per-ns routing cache more sanely.

Flush the routing cache only of entries that match the network namespace in which the purge event occurred. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

ipv4: Flush per-ns routing cache more sanely.
Flush the routing cache only of entries that match the network namespace in which the purge event occurred. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
6561a3b1 · David S. Miller · 782615ae · 6561a3b1 · 6561a3b1 · 6561a3b1
Commit 6561a3b1 authored Dec 19, 2010 by David S. Miller
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 42 deletions

include/net/route.h include/net/route.h +1 -1

net/ipv4/fib_frontend.c net/ipv4/fib_frontend.c +5 -1

net/ipv4/route.c net/ipv4/route.c +24 -40

No files found.
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -114,7 +114,7 @@ extern int		ip_rt_init(void);
 extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
 				       __be32 src, struct net_device *dev);
 extern void		rt_cache_flush(struct net *net, int how);
-extern void		rt_cache_flush_batch(void);
+extern void		rt_cache_flush_batch(struct net *net);
 extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);

--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		rt_cache_flush(dev_net(dev), 0);
 		break;
 	case NETDEV_UNREGISTER_BATCH:
-		rt_cache_flush_batch();
+		/* The batch unregister is only called on the first
+		 * device in the list of devices being unregistered.
+		 * Therefore we should not pass dev_net(dev) in here.
+		 */
+		rt_cache_flush_batch(NULL);
 		break;
 	}
 	return NOTIFY_DONE;

--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
 * Can be called by a softirq or a process.
 * In the later case, we want to be reschedule if necessary
 */
-static void rt_do_flush(int process_context)
+static void rt_do_flush(struct net *net, int process_context)
 {
 	unsigned int i;
 	struct rtable *rth, *next;
-	struct rtable * tail;

 	for (i = 0; i <= rt_hash_mask; i++) {
+		struct rtable __rcu **pprev;
+		struct rtable *list;
+
 		if (process_context && need_resched())
 			cond_resched();
 		rth = rcu_dereference_raw(rt_hash_table[i].chain);
@@ -731,50 +733,32 @@ static void rt_do_flush(int process_context)
 			continue;

 		spin_lock_bh(rt_hash_lock_addr(i));
-#ifdef CONFIG_NET_NS
-		{
-		struct rtable __rcu **prev;
-		struct rtable *p;

-		rth = rcu_dereference_protected(rt_hash_table[i].chain,
+		list = NULL;
+		pprev = &rt_hash_table[i].chain;
+		rth = rcu_dereference_protected(*pprev,
 			lockdep_is_held(rt_hash_lock_addr(i)));

-		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail;
-		     tail = rcu_dereference_protected(tail->dst.rt_next,
-				lockdep_is_held(rt_hash_lock_addr(i))))
-			if (!rt_is_expired(tail))
-				break;
-		if (rth != tail)
-			rt_hash_table[i].chain = tail;
-
-		/* call rt_free on entries after the tail requiring flush */
-		prev = &rt_hash_table[i].chain;
-		for (p = rcu_dereference_protected(*prev,
+		while (rth) {
+			next = rcu_dereference_protected(rth->dst.rt_next,
 				lockdep_is_held(rt_hash_lock_addr(i)));
-		     p != NULL;
-		     p = next) {
-			next = rcu_dereference_protected(p->dst.rt_next,
-				lockdep_is_held(rt_hash_lock_addr(i)));
-			if (!rt_is_expired(p)) {
-				prev = &p->dst.rt_next;
+
+			if (!net ||
+			    net_eq(dev_net(rth->dst.dev), net)) {
+				rcu_assign_pointer(*pprev, next);
+				rcu_assign_pointer(rth->dst.rt_next, list);
+				list = rth;
 			} else {
-				*prev = next;
-				rt_free(p);
+				pprev = &rth->dst.rt_next;
 			}
+			rth = next;
 		}
-		}
-#else
-		rth = rcu_dereference_protected(rt_hash_table[i].chain,
-			lockdep_is_held(rt_hash_lock_addr(i)));
-		rcu_assign_pointer(rt_hash_table[i].chain, NULL);
-		tail = NULL;
-#endif
+
 		spin_unlock_bh(rt_hash_lock_addr(i));

-		for (; rth != tail; rth = next) {
-			next = rcu_dereference_protected(rth->dst.rt_next, 1);
-			rt_free(rth);
+		for (; list; list = next) {
+			next = rcu_dereference_protected(list->dst.rt_next, 1);
+			rt_free(list);
 		}
 	}
 }
@@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
 {
 	rt_cache_invalidate(net);
 	if (delay >= 0)
-		rt_do_flush(!in_softirq());
+		rt_do_flush(net, !in_softirq());
 }

 /* Flush previous cache invalidated entries from the cache */
-void rt_cache_flush_batch(void)
+void rt_cache_flush_batch(struct net *net)
 {
-	rt_do_flush(!in_softirq());
+	rt_do_flush(net, !in_softirq());
 }

 static void rt_emergency_hash_rebuild(struct net *net)