Commit a2f1483b authored by David S. Miller's avatar David S. Miller

Merge branch 'vxlan-fix-default-fdb-entry-user-space-notify-ordering-race'

Roopa Prabhu says:

====================
vxlan: fix default fdb entry user-space notify ordering/race

Problem:
In vxlan_newlink, a default fdb entry is added before register_netdev.
The default fdb creation function notifies user-space of the
fdb entry on the vxlan device which user-space does not know about yet.
(RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex).

This series fixes the user-space netlink notification ordering issue
with the following changes:
- decouple fdb notify from fdb create.
- Move fdb notify after register_netdev.
- modify rtnl_configure_link to allow configuring a link early.
- Call rtnl_configure_link in vxlan newlink handler to notify
userspace about the newlink before fdb notify and
hence avoiding the user-space race.
====================

Fixes: afbd8bae ("vxlan: add implicit fdb entry for default destination")
Signed-off-by: default avatarRoopa Prabhu <roopa@cumulusnetworks.com>
parents 22dd1491 0241b836
...@@ -637,8 +637,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) ...@@ -637,8 +637,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
} }
/* Add new entry to forwarding table -- assumes lock held */ static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
const u8 *mac, __u16 state,
__be32 src_vni, __u8 ndm_flags)
{
struct vxlan_fdb *f;
f = kmalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return NULL;
f->state = state;
f->flags = ndm_flags;
f->updated = f->used = jiffies;
f->vni = src_vni;
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
return f;
}
static int vxlan_fdb_create(struct vxlan_dev *vxlan, static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __be16 port, __be32 src_vni,
__be32 vni, __u32 ifindex, __u8 ndm_flags,
struct vxlan_fdb **fdb)
{
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int rc;
if (vxlan->cfg.addrmax &&
vxlan->addrcnt >= vxlan->cfg.addrmax)
return -ENOSPC;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
if (!f)
return -ENOMEM;
rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
if (rc < 0) {
kfree(f);
return rc;
}
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
vxlan_fdb_head(vxlan, mac, src_vni));
*fdb = f;
return 0;
}
/* Add new entry to forwarding table -- assumes lock held */
static int vxlan_fdb_update(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip, const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags, __u16 state, __u16 flags,
__be16 port, __be32 src_vni, __be32 vni, __be16 port, __be32 src_vni, __be32 vni,
...@@ -688,37 +741,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, ...@@ -688,37 +741,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
if (!(flags & NLM_F_CREATE)) if (!(flags & NLM_F_CREATE))
return -ENOENT; return -ENOENT;
if (vxlan->cfg.addrmax &&
vxlan->addrcnt >= vxlan->cfg.addrmax)
return -ENOSPC;
/* Disallow replace to add a multicast entry */ /* Disallow replace to add a multicast entry */
if ((flags & NLM_F_REPLACE) && if ((flags & NLM_F_REPLACE) &&
(is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
f = kmalloc(sizeof(*f), GFP_ATOMIC); rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
if (!f) vni, ifindex, ndm_flags, &f);
return -ENOMEM; if (rc < 0)
notify = 1;
f->state = state;
f->flags = ndm_flags;
f->updated = f->used = jiffies;
f->vni = src_vni;
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
if (rc < 0) {
kfree(f);
return rc; return rc;
} notify = 1;
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
vxlan_fdb_head(vxlan, mac, src_vni));
} }
if (notify) { if (notify) {
...@@ -742,13 +775,15 @@ static void vxlan_fdb_free(struct rcu_head *head) ...@@ -742,13 +775,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
kfree(f); kfree(f);
} }
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
bool do_notify)
{ {
netdev_dbg(vxlan->dev, netdev_dbg(vxlan->dev,
"delete %pM\n", f->eth_addr); "delete %pM\n", f->eth_addr);
--vxlan->addrcnt; --vxlan->addrcnt;
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); if (do_notify)
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
hlist_del_rcu(&f->hlist); hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free); call_rcu(&f->rcu, vxlan_fdb_free);
...@@ -864,7 +899,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -864,7 +899,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -EAFNOSUPPORT; return -EAFNOSUPPORT;
spin_lock_bh(&vxlan->hash_lock); spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex, ndm->ndm_flags); port, src_vni, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
...@@ -898,7 +933,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, ...@@ -898,7 +933,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
goto out; goto out;
} }
vxlan_fdb_destroy(vxlan, f); vxlan_fdb_destroy(vxlan, f, true);
out: out:
return 0; return 0;
...@@ -1007,7 +1042,7 @@ static bool vxlan_snoop(struct net_device *dev, ...@@ -1007,7 +1042,7 @@ static bool vxlan_snoop(struct net_device *dev,
/* close off race between vxlan_flush and incoming packets */ /* close off race between vxlan_flush and incoming packets */
if (netif_running(dev)) if (netif_running(dev))
vxlan_fdb_create(vxlan, src_mac, src_ip, vxlan_fdb_update(vxlan, src_mac, src_ip,
NUD_REACHABLE, NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE, NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port, vxlan->cfg.dst_port,
...@@ -2366,7 +2401,7 @@ static void vxlan_cleanup(struct timer_list *t) ...@@ -2366,7 +2401,7 @@ static void vxlan_cleanup(struct timer_list *t)
"garbage collect %pM\n", "garbage collect %pM\n",
f->eth_addr); f->eth_addr);
f->state = NUD_STALE; f->state = NUD_STALE;
vxlan_fdb_destroy(vxlan, f); vxlan_fdb_destroy(vxlan, f, true);
} else if (time_before(timeout, next_timer)) } else if (time_before(timeout, next_timer))
next_timer = timeout; next_timer = timeout;
} }
...@@ -2417,7 +2452,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) ...@@ -2417,7 +2452,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
spin_lock_bh(&vxlan->hash_lock); spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f) if (f)
vxlan_fdb_destroy(vxlan, f); vxlan_fdb_destroy(vxlan, f, true);
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
} }
...@@ -2471,7 +2506,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) ...@@ -2471,7 +2506,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
continue; continue;
/* the all_zeros_mac entry is deleted at vxlan_uninit */ /* the all_zeros_mac entry is deleted at vxlan_uninit */
if (!is_zero_ether_addr(f->eth_addr)) if (!is_zero_ether_addr(f->eth_addr))
vxlan_fdb_destroy(vxlan, f); vxlan_fdb_destroy(vxlan, f, true);
} }
} }
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
...@@ -3162,6 +3197,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, ...@@ -3162,6 +3197,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
{ {
struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f = NULL;
int err; int err;
err = vxlan_dev_configure(net, dev, conf, false, extack); err = vxlan_dev_configure(net, dev, conf, false, extack);
...@@ -3175,24 +3211,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, ...@@ -3175,24 +3211,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
err = vxlan_fdb_create(vxlan, all_zeros_mac, err = vxlan_fdb_create(vxlan, all_zeros_mac,
&vxlan->default_dst.remote_ip, &vxlan->default_dst.remote_ip,
NUD_REACHABLE | NUD_PERMANENT, NUD_REACHABLE | NUD_PERMANENT,
NLM_F_EXCL | NLM_F_CREATE,
vxlan->cfg.dst_port, vxlan->cfg.dst_port,
vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_ifindex, vxlan->default_dst.remote_ifindex,
NTF_SELF); NTF_SELF, &f);
if (err) if (err)
return err; return err;
} }
err = register_netdevice(dev); err = register_netdevice(dev);
if (err)
goto errout;
err = rtnl_configure_link(dev, NULL);
if (err) { if (err) {
vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); unregister_netdevice(dev);
return err; goto errout;
} }
/* notify default fdb entry */
if (f)
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
list_add(&vxlan->next, &vn->vxlan_list); list_add(&vxlan->next, &vn->vxlan_list);
return 0; return 0;
errout:
if (f)
vxlan_fdb_destroy(vxlan, f, false);
return err;
} }
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
...@@ -3427,6 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], ...@@ -3427,6 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst *dst = &vxlan->default_dst;
struct vxlan_rdst old_dst; struct vxlan_rdst old_dst;
struct vxlan_config conf; struct vxlan_config conf;
struct vxlan_fdb *f = NULL;
int err; int err;
err = vxlan_nl2conf(tb, data, err = vxlan_nl2conf(tb, data,
...@@ -3455,16 +3503,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], ...@@ -3455,16 +3503,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
err = vxlan_fdb_create(vxlan, all_zeros_mac, err = vxlan_fdb_create(vxlan, all_zeros_mac,
&dst->remote_ip, &dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT, NUD_REACHABLE | NUD_PERMANENT,
NLM_F_CREATE | NLM_F_APPEND,
vxlan->cfg.dst_port, vxlan->cfg.dst_port,
dst->remote_vni, dst->remote_vni,
dst->remote_vni, dst->remote_vni,
dst->remote_ifindex, dst->remote_ifindex,
NTF_SELF); NTF_SELF, &f);
if (err) { if (err) {
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
return err; return err;
} }
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
} }
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
} }
......
...@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) ...@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
return err; return err;
} }
dev->rtnl_link_state = RTNL_LINK_INITIALIZED; if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
__dev_notify_flags(dev, old_flags, 0U);
__dev_notify_flags(dev, old_flags, ~0U); } else {
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
__dev_notify_flags(dev, old_flags, ~0U);
}
return 0; return 0;
} }
EXPORT_SYMBOL(rtnl_configure_link); EXPORT_SYMBOL(rtnl_configure_link);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment