Commit d8118b94 authored by David S. Miller's avatar David S. Miller

Merge branch 'udp-tunnel-route-lookups'

Beniamino Galvani says:

====================
net: consolidate IPv4 route lookup for UDP tunnels

At the moment different UDP tunnels rely on different functions for
IPv4 route lookup, and those functions all implement the same
logic. Only bareudp uses the generic ip_route_output_tunnel(), while
geneve and vxlan basically duplicate it slightly differently.

This series first extends the generic lookup function so that it is
suitable for all UDP tunnel implementations. Then, bareudp, geneve and
vxlan are adapted to use them.

This results in code with less duplication and hopefully better
maintainability.

After this series is merged, IPv6 will be converted in a similar way.

Changelog:
v2
 - fix compilation with IPv6 disabled
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3c4fe898 6f19b2c1
......@@ -306,8 +306,10 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (!sock)
return -ESHUTDOWN;
rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, info,
IPPROTO_UDP, use_cache);
rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key,
0, 0, key->tos,
use_cache ?
(struct dst_cache *)&info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
......@@ -483,8 +485,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev,
struct rtable *rt;
__be32 saddr;
rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr,
info, IPPROTO_UDP, use_cache);
rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr,
&info->key, 0, 0, info->key.tos,
use_cache ? &info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
......
......@@ -784,60 +784,20 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
return err;
}
static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct net_device *dev,
struct geneve_sock *gs4,
struct flowi4 *fl4,
const struct ip_tunnel_info *info,
__be16 dport, __be16 sport,
__u8 *full_tos)
static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev,
const struct ip_tunnel_info *info,
bool *use_cache)
{
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache;
struct rtable *rt = NULL;
__u8 tos;
if (!gs4)
return ERR_PTR(-EIO);
u8 dsfield;
memset(fl4, 0, sizeof(*fl4));
fl4->flowi4_mark = skb->mark;
fl4->flowi4_proto = IPPROTO_UDP;
fl4->daddr = info->key.u.ipv4.dst;
fl4->saddr = info->key.u.ipv4.src;
fl4->fl4_dport = dport;
fl4->fl4_sport = sport;
fl4->flowi4_flags = info->key.flow_flags;
tos = info->key.tos;
if ((tos == 1) && !geneve->cfg.collect_md) {
tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
use_cache = false;
dsfield = info->key.tos;
if (dsfield == 1 && !geneve->cfg.collect_md) {
dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
*use_cache = false;
}
fl4->flowi4_tos = RT_TOS(tos);
if (full_tos)
*full_tos = tos;
dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt)
return rt;
}
rt = ip_route_output_key(geneve->net, fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
return rt;
return dsfield;
}
#if IS_ENABLED(CONFIG_IPV6)
......@@ -865,12 +825,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
fl6->fl6_dport = dport;
fl6->fl6_sport = sport;
prio = info->key.tos;
if ((prio == 1) && !geneve->cfg.collect_md) {
prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
use_cache = false;
}
prio = geneve_get_dsfield(skb, dev, info, &use_cache);
fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label);
dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
......@@ -904,19 +859,28 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
const struct ip_tunnel_key *key = &info->key;
struct rtable *rt;
struct flowi4 fl4;
__u8 full_tos;
bool use_cache;
__u8 tos, ttl;
__be16 df = 0;
__be32 saddr;
__be16 sport;
int err;
if (!pskb_inet_may_pull(skb))
return -EINVAL;
if (!gs4)
return -EIO;
use_cache = ip_tunnel_dst_cache_usable(skb, info);
tos = geneve_get_dsfield(skb, dev, info, &use_cache);
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
geneve->cfg.info.key.tp_dst, sport, &full_tos);
rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
&info->key,
sport, geneve->cfg.info.key.tp_dst, tos,
use_cache ?
(struct dst_cache *)&info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
......@@ -939,8 +903,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return -ENOMEM;
}
unclone->key.u.ipv4.dst = fl4.saddr;
unclone->key.u.ipv4.src = fl4.daddr;
unclone->key.u.ipv4.dst = saddr;
unclone->key.u.ipv4.src = info->key.u.ipv4.dst;
}
if (!pskb_may_pull(skb, ETH_HLEN)) {
......@@ -954,13 +918,12 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return -EMSGSIZE;
}
tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb);
if (geneve->cfg.collect_md) {
tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb);
if (geneve->cfg.ttl_inherit)
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
else
......@@ -988,7 +951,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
return err;
udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst,
tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
!net_eq(geneve->net, dev_net(geneve->dev)),
!(info->key.tun_flags & TUNNEL_CSUM));
......@@ -1137,19 +1100,29 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (ip_tunnel_info_af(info) == AF_INET) {
struct rtable *rt;
struct flowi4 fl4;
struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
bool use_cache;
__be32 saddr;
u8 tos;
if (!gs4)
return -EIO;
use_cache = ip_tunnel_dst_cache_usable(skb, info);
tos = geneve_get_dsfield(skb, dev, info, &use_cache);
sport = udp_flow_src_port(geneve->net, skb,
1, USHRT_MAX, true);
rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
geneve->cfg.info.key.tp_dst, sport, NULL);
rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
&info->key,
sport, geneve->cfg.info.key.tp_dst,
tos,
use_cache ? &info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
info->key.u.ipv4.src = fl4.saddr;
info->key.u.ipv4.src = saddr;
#if IS_ENABLED(CONFIG_IPV6)
} else if (ip_tunnel_info_af(info) == AF_INET6) {
struct dst_entry *dst;
......
......@@ -2215,57 +2215,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
return 0;
}
static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
struct vxlan_sock *sock4,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
__u8 flow_flags, struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct rtable *rt = NULL;
struct flowi4 fl4;
if (!sock4)
return ERR_PTR(-EIO);
if (tos && !info)
use_cache = false;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
}
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = oif;
fl4.flowi4_tos = RT_TOS(tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = daddr;
fl4.saddr = *saddr;
fl4.fl4_dport = dport;
fl4.fl4_sport = sport;
fl4.flowi4_flags = flow_flags;
rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt)) {
if (rt->dst.dev == dev) {
netdev_dbg(dev, "circular route to %pI4\n", &daddr);
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
*saddr = fl4.saddr;
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
} else {
netdev_dbg(dev, "no route to %pI4\n", &daddr);
return ERR_PTR(-ENETUNREACH);
}
return rt;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct net_device *dev,
......@@ -2418,30 +2367,38 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct ip_tunnel_key *pkey;
struct ip_tunnel_key key;
struct vxlan_dev *vxlan = netdev_priv(dev);
const struct iphdr *old_iph = ip_hdr(skb);
union vxlan_addr *dst;
union vxlan_addr remote_ip, local_ip;
union vxlan_addr remote_ip;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
unsigned int pkt_len = skb->len;
__be16 src_port = 0, dst_port;
struct dst_entry *ndst = NULL;
__u8 tos, ttl, flow_flags = 0;
__u8 tos, ttl;
int ifindex;
int err;
u32 flags = vxlan->cfg.flags;
bool use_cache;
bool udp_sum = false;
bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
__be32 vni = 0;
#if IS_ENABLED(CONFIG_IPV6)
union vxlan_addr local_ip;
__be32 label;
#endif
info = skb_tunnel_info(skb);
use_cache = ip_tunnel_dst_cache_usable(skb, info);
if (rdst) {
dst = &rdst->remote_ip;
memset(&key, 0, sizeof(key));
pkey = &key;
if (vxlan_addr_any(dst)) {
if (did_rsc) {
/* short-circuited back to local bridge */
......@@ -2455,7 +2412,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = (rdst->remote_vni) ? : default_vni;
ifindex = rdst->remote_ifindex;
local_ip = vxlan->cfg.saddr;
if (dst->sa.sa_family == AF_INET) {
key.u.ipv4.src = vxlan->cfg.saddr.sin.sin_addr.s_addr;
key.u.ipv4.dst = rdst->remote_ip.sin.sin_addr.s_addr;
} else {
key.u.ipv6.src = vxlan->cfg.saddr.sin6.sin6_addr;
key.u.ipv6.dst = rdst->remote_ip.sin6.sin6_addr;
}
dst_cache = &rdst->dst_cache;
md->gbp = skb->mark;
if (flags & VXLAN_F_TTL_INHERIT) {
......@@ -2469,12 +2434,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = vxlan->cfg.tos;
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
if (tos && !info)
use_cache = false;
if (dst->sa.sa_family == AF_INET)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
else
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
#if IS_ENABLED(CONFIG_IPV6)
local_ip = vxlan->cfg.saddr;
label = vxlan->cfg.label;
#endif
} else {
......@@ -2486,14 +2454,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET) {
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
} else {
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
#if IS_ENABLED(CONFIG_IPV6)
local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
#endif
}
dst = &remote_ip;
pkey = &info->key;
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
flow_flags = info->key.flow_flags;
vni = tunnel_id_to_key32(info->key.tun_id);
ifindex = 0;
dst_cache = &info->dst_cache;
......@@ -2517,15 +2486,14 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
__be16 df = 0;
__be32 saddr;
if (!ifindex)
ifindex = sock4->sock->sk->sk_bound_dev_if;
rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
dst->sin.sin_addr.s_addr,
&local_ip.sin.sin_addr.s_addr,
dst_port, src_port, flow_flags,
dst_cache, info);
rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, ifindex,
&saddr, pkey, src_port, dst_port,
tos, use_cache ? dst_cache : NULL);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto tx_error;
......@@ -2561,16 +2529,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else if (err) {
if (info) {
struct ip_tunnel_info *unclone;
struct in_addr src, dst;
unclone = skb_tunnel_info_unclone(skb);
if (unlikely(!unclone))
goto tx_error;
src = remote_ip.sin.sin_addr;
dst = local_ip.sin.sin_addr;
unclone->key.u.ipv4.src = src.s_addr;
unclone->key.u.ipv4.dst = dst.s_addr;
unclone->key.u.ipv4.src = pkey->u.ipv4.dst;
unclone->key.u.ipv4.dst = saddr;
}
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
dst_release(ndst);
......@@ -2584,8 +2549,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (err < 0)
goto tx_error;
udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
pkey->u.ipv4.dst, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
......@@ -3286,11 +3251,14 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
info->key.u.ipv4.dst,
&info->key.u.ipv4.src, dport, sport,
info->key.flow_flags, &info->dst_cache,
info);
if (!sock4)
return -EIO;
rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, 0,
&info->key.u.ipv4.src,
&info->key,
sport, dport, info->key.tos,
&info->dst_cache);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
......
......@@ -136,12 +136,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net,
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
const struct sock *sk);
struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
struct net_device *dev,
struct net *net, __be32 *saddr,
const struct ip_tunnel_info *info,
u8 protocol, bool use_cache);
struct dst_entry *ipv4_blackhole_route(struct net *net,
struct dst_entry *dst_orig);
......
......@@ -162,6 +162,14 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
void udp_tunnel_sock_release(struct socket *sock);
struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
struct net_device *dev,
struct net *net, int oif,
__be32 *saddr,
const struct ip_tunnel_key *key,
__be16 sport, __be16 dport, u8 tos,
struct dst_cache *dst_cache);
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
__be16 flags, __be64 tunnel_id,
int md_size);
......
......@@ -2885,54 +2885,6 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
struct net_device *dev,
struct net *net, __be32 *saddr,
const struct ip_tunnel_info *info,
u8 protocol, bool use_cache)
{
#ifdef CONFIG_DST_CACHE
struct dst_cache *dst_cache;
#endif
struct rtable *rt = NULL;
struct flowi4 fl4;
__u8 tos;
#ifdef CONFIG_DST_CACHE
dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
}
#endif
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = protocol;
fl4.daddr = info->key.u.ipv4.dst;
fl4.saddr = info->key.u.ipv4.src;
tos = info->key.tos;
fl4.flowi4_tos = RT_TOS(tos);
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
#ifdef CONFIG_DST_CACHE
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
#endif
*saddr = fl4.saddr;
return rt;
}
EXPORT_SYMBOL_GPL(ip_route_output_tunnel);
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
struct rtable *rt, u32 table_id, struct flowi4 *fl4,
......
......@@ -204,4 +204,53 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
}
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
struct net_device *dev,
struct net *net, int oif,
__be32 *saddr,
const struct ip_tunnel_key *key,
__be16 sport, __be16 dport, u8 tos,
struct dst_cache *dst_cache)
{
struct rtable *rt = NULL;
struct flowi4 fl4;
#ifdef CONFIG_DST_CACHE
if (dst_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
}
#endif
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.flowi4_oif = oif;
fl4.daddr = key->u.ipv4.dst;
fl4.saddr = key->u.ipv4.src;
fl4.fl4_dport = dport;
fl4.fl4_sport = sport;
fl4.flowi4_tos = RT_TOS(tos);
fl4.flowi4_flags = key->flow_flags;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
#ifdef CONFIG_DST_CACHE
if (dst_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
#endif
*saddr = fl4.saddr;
return rt;
}
EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup);
MODULE_LICENSE("GPL");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment