Commit db3c6139 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by David S. Miller

bpf, vxlan, geneve, gre: fix usage of dst_cache on xmit

The assumptions from commit 0c1d70af ("net: use dst_cache for vxlan
device"), 468dfffc ("geneve: add dst caching support") and 3c1cb4d2
("net/ipv4: add dst cache support for gre lwtunnels") on dst_cache usage
when ip_tunnel_info is used is unfortunately not always valid as assumed.

While it seems correct for ip_tunnel_info front-ends such as OVS, eBPF
however can fill in ip_tunnel_info for consumers like vxlan, geneve or gre
with different remote dsts, tos, etc, therefore they cannot be assumed as
packet independent.

Right now vxlan, geneve, gre would cache the dst for eBPF and every packet
would reuse the same entry that was first created on the initial route
lookup. eBPF doesn't store/cache the ip_tunnel_info, so each skb may have
a different one.

Fix it by adding a flag that checks the ip_tunnel_info. Also the !tos test
in vxlan needs to be handeled differently in this context as it is currently
inferred from ip_tunnel_info as well if present. ip_tunnel_dst_cache_usable()
helper is added for the three tunnel cases, which checks if we can use dst
cache.

Fixes: 0c1d70af ("net: use dst_cache for vxlan device")
Fixes: 468dfffc ("geneve: add dst caching support")
Fixes: 3c1cb4d2 ("net/ipv4: add dst cache support for gre lwtunnels")
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarPaolo Abeni <pabeni@redhat.com>
Acked-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 14ca0751
...@@ -775,10 +775,10 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ...@@ -775,10 +775,10 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct flowi4 *fl4, struct flowi4 *fl4,
struct ip_tunnel_info *info) struct ip_tunnel_info *info)
{ {
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev); struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache; struct dst_cache *dst_cache;
struct rtable *rt = NULL; struct rtable *rt = NULL;
bool use_cache = true;
__u8 tos; __u8 tos;
memset(fl4, 0, sizeof(*fl4)); memset(fl4, 0, sizeof(*fl4));
...@@ -804,7 +804,6 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ...@@ -804,7 +804,6 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
dst_cache = &geneve->dst_cache; dst_cache = &geneve->dst_cache;
} }
use_cache = use_cache && !skb->mark;
if (use_cache) { if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt) if (rt)
...@@ -832,11 +831,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, ...@@ -832,11 +831,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct flowi6 *fl6, struct flowi6 *fl6,
struct ip_tunnel_info *info) struct ip_tunnel_info *info)
{ {
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev); struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6; struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
struct dst_cache *dst_cache; struct dst_cache *dst_cache;
bool use_cache = true;
__u8 prio; __u8 prio;
memset(fl6, 0, sizeof(*fl6)); memset(fl6, 0, sizeof(*fl6));
...@@ -862,7 +861,6 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, ...@@ -862,7 +861,6 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
dst_cache = &geneve->dst_cache; dst_cache = &geneve->dst_cache;
} }
use_cache = use_cache && !skb->mark;
if (use_cache) { if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
if (dst) if (dst)
......
...@@ -1756,17 +1756,15 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, ...@@ -1756,17 +1756,15 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos, struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr, __be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache, struct dst_cache *dst_cache,
struct ip_tunnel_info *info) const struct ip_tunnel_info *info)
{ {
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct rtable *rt = NULL; struct rtable *rt = NULL;
bool use_cache = false;
struct flowi4 fl4; struct flowi4 fl4;
/* when the ip_tunnel_info is availble, the tos used for lookup is if (tos && !info)
* packet independent, so we can use the cache use_cache = false;
*/ if (use_cache) {
if (!skb->mark && (!tos || info)) {
use_cache = true;
rt = dst_cache_get_ip4(dst_cache, saddr); rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt) if (rt)
return rt; return rt;
...@@ -1794,13 +1792,15 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, ...@@ -1794,13 +1792,15 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, struct sk_buff *skb, int oif,
const struct in6_addr *daddr, const struct in6_addr *daddr,
struct in6_addr *saddr, struct in6_addr *saddr,
struct dst_cache *dst_cache) struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{ {
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst; struct dst_entry *ndst;
struct flowi6 fl6; struct flowi6 fl6;
int err; int err;
if (!skb->mark) { if (use_cache) {
ndst = dst_cache_get_ip6(dst_cache, saddr); ndst = dst_cache_get_ip6(dst_cache, saddr);
if (ndst) if (ndst)
return ndst; return ndst;
...@@ -1820,7 +1820,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, ...@@ -1820,7 +1820,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err); return ERR_PTR(err);
*saddr = fl6.saddr; *saddr = fl6.saddr;
if (!skb->mark) if (use_cache)
dst_cache_set_ip6(dst_cache, ndst, saddr); dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst; return ndst;
} }
...@@ -2018,7 +2018,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2018,7 +2018,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ndst = vxlan6_get_route(vxlan, skb, ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, rdst ? rdst->remote_ifindex : 0,
&dst->sin6.sin6_addr, &saddr, &dst->sin6.sin6_addr, &saddr,
dst_cache); dst_cache, info);
if (IS_ERR(ndst)) { if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n", netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr); &dst->sin6.sin6_addr);
...@@ -2387,7 +2387,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ...@@ -2387,7 +2387,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL; return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0, ndst = vxlan6_get_route(vxlan, skb, 0,
&info->key.u.ipv6.dst, &info->key.u.ipv6.dst,
&info->key.u.ipv6.src, NULL); &info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst)) if (IS_ERR(ndst))
return PTR_ERR(ndst); return PTR_ERR(ndst);
dst_release(ndst); dst_release(ndst);
......
...@@ -140,6 +140,7 @@ struct ip_tunnel { ...@@ -140,6 +140,7 @@ struct ip_tunnel {
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) #define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
...@@ -206,6 +207,20 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, ...@@ -206,6 +207,20 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
} }
static inline bool
ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
if (skb->mark)
return false;
if (!info)
return true;
if (info->key.tun_flags & TUNNEL_NOCACHE)
return false;
return true;
}
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
*tun_info) *tun_info)
{ {
......
...@@ -1870,7 +1870,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) ...@@ -1870,7 +1870,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info; info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX; info->mode = IP_TUNNEL_INFO_TX;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM; info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
if (flags & BPF_F_DONT_FRAGMENT) if (flags & BPF_F_DONT_FRAGMENT)
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
......
...@@ -527,11 +527,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -527,11 +527,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct ip_tunnel_info *tun_info; struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key; const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
struct flowi4 fl; struct flowi4 fl;
struct rtable *rt;
int min_headroom; int min_headroom;
int tunnel_hlen; int tunnel_hlen;
__be16 df, flags; __be16 df, flags;
bool use_cache;
int err; int err;
tun_info = skb_tunnel_info(skb); tun_info = skb_tunnel_info(skb);
...@@ -540,13 +541,14 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -540,13 +541,14 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb; goto err_free_skb;
key = &tun_info->key; key = &tun_info->key;
rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) : use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
NULL; if (use_cache)
rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
if (!rt) { if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key); rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt)) if (IS_ERR(rt))
goto err_free_skb; goto err_free_skb;
if (!skb->mark) if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr); fl.saddr);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment