Commit a0a86022 authored by David S. Miller's avatar David S. Miller

Merge branch 'devlink-deadlock'

Jiri Pirko says:

====================
devlink: fix a deadlock when taking devlink instance lock while holding RTNL lock

devlink_port_fill() may be called sometimes with RTNL lock held.
When putting the nested port function devlink instance attrs,
current code takes nested devlink instance lock. In that case lock
ordering is wrong.

Patch #1 is a dependency of patch #2.
Patch #2 converts the peernet2id_alloc() call to rely in RCU so it could
         called without devlink instance lock.
Patch #3 takes device reference for devlink instance making sure that
         device does not disappear before devlink_release() is called.
Patch #4 benefits from the preparations done in patches #2 and #3 and
         removes the problematic nested devlink lock aquisition.
Patched #5-#7 improve documentation to reflect this issue so it is
              avoided in the future.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ee2a35fe 5d77371e
......@@ -18,6 +18,34 @@ netlink commands.
Drivers are encouraged to use the devlink instance lock for their own needs.
Drivers need to be cautious when taking devlink instance lock and
taking RTNL lock at the same time. Devlink instance lock needs to be taken
first, only after that RTNL lock could be taken.
Nested instances
----------------
Some objects, like linecards or port functions, could have another
devlink instances created underneath. In that case, drivers should make
sure to respect following rules:
- Lock ordering should be maintained. If driver needs to take instance
lock of both nested and parent instances at the same time, devlink
instance lock of the parent instance should be taken first, only then
instance lock of the nested instance could be taken.
- Driver should use object-specific helpers to setup the
nested relationship:
- ``devl_nested_devlink_set()`` - called to setup devlink -> nested
devlink relationship (could be user for multiple nested instances.
- ``devl_port_fn_devlink_set()`` - called to setup port function ->
nested devlink relationship.
- ``devlink_linecard_nested_dl_set()`` - called to setup linecard ->
nested devlink relationship.
The nested devlink info is exposed to the userspace over object-specific
attributes of devlink netlink.
Interface documentation
-----------------------
......
......@@ -368,21 +368,30 @@ static inline void put_net_track(struct net *net, netns_tracker *tracker)
typedef struct {
#ifdef CONFIG_NET_NS
struct net *net;
struct net __rcu *net;
#endif
} possible_net_t;
static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
#ifdef CONFIG_NET_NS
pnet->net = net;
rcu_assign_pointer(pnet->net, net);
#endif
}
static inline struct net *read_pnet(const possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
return pnet->net;
return rcu_dereference_protected(pnet->net, true);
#else
return &init_net;
#endif
}
static inline struct net *read_pnet_rcu(possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
return rcu_dereference(pnet->net);
#else
return &init_net;
#endif
......
......@@ -168,6 +168,20 @@ int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
return 0;
}
/**
* devlink_rel_nested_in_notify - Notify the object this devlink
* instance is nested in.
* @devlink: devlink
*
* This is called upon network namespace change of devlink instance.
* In case this devlink instance is nested in another devlink object,
* a notification of a change of this object should be sent
* over netlink. The parent devlink instance lock needs to be
* taken during the notification preparation.
* However, since the devlink lock of nested instance is held here,
* we would end with wrong devlink instance lock ordering and
* deadlock. Therefore the work is utilized to avoid that.
*/
void devlink_rel_nested_in_notify(struct devlink *devlink)
{
struct devlink_rel *rel = devlink->rel;
......@@ -183,9 +197,8 @@ static struct devlink_rel *devlink_rel_find(unsigned long rel_index)
DEVLINK_REL_IN_USE);
}
static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index)
static struct devlink *devlink_rel_devlink_get(u32 rel_index)
{
struct devlink *devlink;
struct devlink_rel *rel;
u32 devlink_index;
......@@ -198,16 +211,7 @@ static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index)
xa_unlock(&devlink_rels);
if (!rel)
return NULL;
devlink = devlinks_xa_get(devlink_index);
if (!devlink)
return NULL;
devl_lock(devlink);
if (!devl_is_registered(devlink)) {
devl_unlock(devlink);
devlink_put(devlink);
return NULL;
}
return devlink;
return devlinks_xa_get(devlink_index);
}
int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
......@@ -218,11 +222,10 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
struct devlink *rel_devlink;
int err;
rel_devlink = devlink_rel_devlink_get_lock(rel_index);
rel_devlink = devlink_rel_devlink_get(rel_index);
if (!rel_devlink)
return 0;
err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype);
devl_unlock(rel_devlink);
devlink_put(rel_devlink);
if (!err && msg_updated)
*msg_updated = true;
......@@ -310,6 +313,7 @@ static void devlink_release(struct work_struct *work)
mutex_destroy(&devlink->lock);
lockdep_unregister_key(&devlink->lock_key);
put_device(devlink->dev);
kfree(devlink);
}
......@@ -425,7 +429,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (ret < 0)
goto err_xa_alloc;
devlink->dev = dev;
devlink->dev = get_device(dev);
devlink->ops = ops;
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
......
......@@ -86,18 +86,24 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype)
{
struct nlattr *nested_attr;
struct net *devl_net;
nested_attr = nla_nest_start(msg, attrtype);
if (!nested_attr)
return -EMSGSIZE;
if (devlink_nl_put_handle(msg, devlink))
goto nla_put_failure;
if (!net_eq(net, devlink_net(devlink))) {
int id = peernet2id_alloc(net, devlink_net(devlink),
GFP_KERNEL);
rcu_read_lock();
devl_net = read_pnet_rcu(&devlink->_net);
if (!net_eq(net, devl_net)) {
int id = peernet2id_alloc(net, devl_net, GFP_ATOMIC);
rcu_read_unlock();
if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id))
return -EMSGSIZE;
} else {
rcu_read_unlock();
}
nla_nest_end(msg, nested_attr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment