ipv4: Add helpers for neigh lookup for nexthop

A common theme in the output path is looking up a neigh entry for a nexthop, either the gateway in an rtable or a fallback to the daddr in the skb: nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); To allow the nexthop to be an IPv6 address we need to consider the family of the nexthop and then call __ipv{4,6}_neigh_lookup_noref based on it. To make this simpler, add a ip_neigh_gw4 helper similar to ip_neigh_gw6 added in an earlier patch which handles: neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); And then add a second one, ip_neigh_for_gw, that calls either ip_neigh_gw4 or ip_neigh_gw6 based on the address family of the gateway. Update the output paths in the VRF driver and core v4 code to use ip_neigh_for_gw simplifying the family based lookup and making both ready for a v6 nexthop. ipv4_neigh_lookup has a different need - the potential to resolve a passed in address in addition to any gateway in the rtable or skb. Since this is a one-off, add ip_neigh_gw4 and ip_neigh_gw6 diectly. The difference between __neigh_create used by the helpers and neigh_create called by ipv4_neigh_lookup is taking a refcount, so add rcu_read_lock_bh and bump the refcnt on the neigh entry. Signed-off-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>

ipv4: Add helpers for neigh lookup for nexthop
A common theme in the output path is looking up a neigh entry for a nexthop, either the gateway in an rtable or a fallback to the daddr in the skb: nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); To allow the nexthop to be an IPv6 address we need to consider the family of the nexthop and then call __ipv{4,6}_neigh_lookup_noref based on it. To make this simpler, add a ip_neigh_gw4 helper similar to ip_neigh_gw6 added in an earlier patch which handles: neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); And then add a second one, ip_neigh_for_gw, that calls either ip_neigh_gw4 or ip_neigh_gw6 based on the address family of the gateway. Update the output paths in the VRF driver and core v4 code to use ip_neigh_for_gw simplifying the family based lookup and making both ready for a v6 nexthop. ipv4_neigh_lookup has a different need - the potential to resolve a passed in address in addition to any gateway in the rtable or skb. Since this is a one-off, add ip_neigh_gw4 and ip_neigh_gw6 diectly. The difference between __neigh_create used by the helpers and neigh_create called by ipv4_neigh_lookup is taking a refcount, so add rcu_read_lock_bh and bump the refcnt on the neigh entry. Signed-off-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
5c9f7c1d · David Ahern · David S. Miller · 0353f282 · 5c9f7c1d · 5c9f7c1d
Commit 5c9f7c1d authored Apr 05, 2019 by David Ahern Committed by David S. Miller Apr 08, 2019
Showing with 59 additions and 23 deletions

drivers/net/vrf.c drivers/net/vrf.c +4 -6

include/net/route.h include/net/route.h +32 -0

net/ipv4/ip_output.c net/ipv4/ip_output.c +4 -7

net/ipv4/route.c net/ipv4/route.c +19 -10

No files found.
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -549,7 +549,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
-	u32 nexthop;
+	bool is_v6gw = false;
 	int ret = -EINVAL;
 	nf_reset(skb);
@@ -572,13 +572,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 	rcu_read_lock_bh();
-	nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
+	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
-	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-	if (unlikely(!neigh))
-		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
 	if (!IS_ERR(neigh)) {
 		sock_confirm_neigh(skb, neigh);
-		ret = neigh_output(neigh, skb, false);
+		/* if crossing protocols, can not use the cached header */
+		ret = neigh_output(neigh, skb, is_v6gw);
 		rcu_read_unlock_bh();
 		return ret;
 	}

--- a/include/net/route.h
+++ b/include/net/route.h
@@ -29,6 +29,8 @@
 #include <net/flow.h>
 #include <net/inet_sock.h>
 #include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
@@ -350,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
 	return hoplimit;
 }
+static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+					     __be32 daddr)
+{
+	struct neighbour *neigh;
+	neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+	if (unlikely(!neigh))
+		neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+	return neigh;
+}
+static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
+						struct sk_buff *skb,
+						bool *is_v6gw)
+{
+	struct net_device *dev = rt->dst.dev;
+	struct neighbour *neigh;
+	if (likely(rt->rt_gw_family == AF_INET)) {
+		neigh = ip_neigh_gw4(dev, rt->rt_gw4);
+	} else if (rt->rt_gw_family == AF_INET6) {
+		neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
+		*is_v6gw = true;
+	} else {
+		neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
+	}
+	return neigh;
+}
 #endif	/* _ROUTE_H */
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
-	u32 nexthop;
+	bool is_v6gw = false;
 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	}
 	rcu_read_lock_bh();
-	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
+	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
-	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-	if (unlikely(!neigh))
-		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
 	if (!IS_ERR(neigh)) {
 		int res;
 		sock_confirm_neigh(skb, neigh);
-		res = neigh_output(neigh, skb, false);
+		/* if crossing protocols, can not use the cached header */
+		res = neigh_output(neigh, skb, is_v6gw);
 		rcu_read_unlock_bh();
 		return res;
 	}

--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -436,18 +436,27 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 {
 	const struct rtable *rt = container_of(dst, struct rtable, dst);
 	struct net_device *dev = dst->dev;
-	const __be32 *pkey = daddr;
 	struct neighbour *n;
-	if (rt->rt_gw_family == AF_INET)
+	rcu_read_lock_bh();
-		pkey = (const __be32 *) &rt->rt_gw4;
-	else if (skb)
+	if (likely(rt->rt_gw_family == AF_INET)) {
-		pkey = &ip_hdr(skb)->daddr;
+		n = ip_neigh_gw4(dev, rt->rt_gw4);
+	} else if (rt->rt_gw_family == AF_INET6) {
-	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
+		n = ip_neigh_gw6(dev, &rt->rt_gw6);
-	if (n)
+        } else {
-		return n;
+		__be32 pkey;
-	return neigh_create(&arp_tbl, pkey, dev);
+		pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+		n = ip_neigh_gw4(dev, pkey);
+	}
+	if (n && !refcount_inc_not_zero(&n->refcnt))
+		n = NULL;
+	rcu_read_unlock_bh();
+	return n;
 }
 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)