Commit e93c9378 authored by Jiri Pirko's avatar Jiri Pirko Committed by Jakub Kicinski

devlink: change per-devlink netdev notifier to static one

The commit 565b4824 ("devlink: change port event netdev notifier
from per-net to global") changed original per-net notifier to be
per-devlink instance. That fixed the issue of non-receiving events
of netdev uninit if that moved to a different namespace.
That worked fine in -net tree.

However, later on when commit ee75f1fc ("net/mlx5e: Create
separate devlink instance for ethernet auxiliary device") and
commit 72ed5d56 ("net/mlx5: Suspend auxiliary devices only in
case of PCI device suspend") were merged, a deadlock was introduced
when removing a namespace with devlink instance with another nested
instance.

Here there is the bad flow example resulting in deadlock with mlx5:
net_cleanup_work -> cleanup_net (takes down_read(&pernet_ops_rwsem) ->
devlink_pernet_pre_exit() -> devlink_reload() ->
mlx5_devlink_reload_down() -> mlx5_unload_one_devl_locked() ->
mlx5_detach_device() -> del_adev() -> mlx5e_remove() ->
mlx5e_destroy_devlink() -> devlink_free() ->
unregister_netdevice_notifier() (takes down_write(&pernet_ops_rwsem)

Steps to reproduce:
$ modprobe mlx5_core
$ ip netns add ns1
$ devlink dev reload pci/0000:08:00.0 netns ns1
$ ip netns del ns1

Resolve this by converting the notifier from per-devlink instance to
a static one registered during init phase and leaving it registered
forever. Use this notifier for all devlink port instances created
later on.

Note what a tree needs this fix only in case all of the cited fixes
commits are present.
Reported-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Fixes: 565b4824 ("devlink: change port event netdev notifier from per-net to global")
Fixes: ee75f1fc ("net/mlx5e: Create separate devlink instance for ethernet auxiliary device")
Fixes: 72ed5d56 ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend")
Signed-off-by: default avatarJiri Pirko <jiri@nvidia.com>
Reviewed-by: default avatarSimon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230510144621.932017-1-jiri@resnulli.usSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 7ce93d6f
...@@ -204,11 +204,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, ...@@ -204,11 +204,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (ret < 0) if (ret < 0)
goto err_xa_alloc; goto err_xa_alloc;
devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event;
ret = register_netdevice_notifier(&devlink->netdevice_nb);
if (ret)
goto err_register_netdevice_notifier;
devlink->dev = dev; devlink->dev = dev;
devlink->ops = ops; devlink->ops = ops;
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
...@@ -233,8 +228,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, ...@@ -233,8 +228,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
return devlink; return devlink;
err_register_netdevice_notifier:
xa_erase(&devlinks, devlink->index);
err_xa_alloc: err_xa_alloc:
kfree(devlink); kfree(devlink);
return NULL; return NULL;
...@@ -266,8 +259,6 @@ void devlink_free(struct devlink *devlink) ...@@ -266,8 +259,6 @@ void devlink_free(struct devlink *devlink)
xa_destroy(&devlink->params); xa_destroy(&devlink->params);
xa_destroy(&devlink->ports); xa_destroy(&devlink->ports);
WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
xa_erase(&devlinks, devlink->index); xa_erase(&devlinks, devlink->index);
devlink_put(devlink); devlink_put(devlink);
...@@ -303,6 +294,10 @@ static struct pernet_operations devlink_pernet_ops __net_initdata = { ...@@ -303,6 +294,10 @@ static struct pernet_operations devlink_pernet_ops __net_initdata = {
.pre_exit = devlink_pernet_pre_exit, .pre_exit = devlink_pernet_pre_exit,
}; };
static struct notifier_block devlink_port_netdevice_nb __net_initdata = {
.notifier_call = devlink_port_netdevice_event,
};
static int __init devlink_init(void) static int __init devlink_init(void)
{ {
int err; int err;
...@@ -311,6 +306,9 @@ static int __init devlink_init(void) ...@@ -311,6 +306,9 @@ static int __init devlink_init(void)
if (err) if (err)
goto out; goto out;
err = register_pernet_subsys(&devlink_pernet_ops); err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
err = register_netdevice_notifier(&devlink_port_netdevice_nb);
out: out:
WARN_ON(err); WARN_ON(err);
......
...@@ -50,7 +50,6 @@ struct devlink { ...@@ -50,7 +50,6 @@ struct devlink {
u8 reload_failed:1; u8 reload_failed:1;
refcount_t refcount; refcount_t refcount;
struct rcu_work rwork; struct rcu_work rwork;
struct notifier_block netdevice_nb;
char priv[] __aligned(NETDEV_ALIGN); char priv[] __aligned(NETDEV_ALIGN);
}; };
......
...@@ -7073,10 +7073,9 @@ int devlink_port_netdevice_event(struct notifier_block *nb, ...@@ -7073,10 +7073,9 @@ int devlink_port_netdevice_event(struct notifier_block *nb,
struct devlink_port *devlink_port = netdev->devlink_port; struct devlink_port *devlink_port = netdev->devlink_port;
struct devlink *devlink; struct devlink *devlink;
devlink = container_of(nb, struct devlink, netdevice_nb); if (!devlink_port)
if (!devlink_port || devlink_port->devlink != devlink)
return NOTIFY_OK; return NOTIFY_OK;
devlink = devlink_port->devlink;
switch (event) { switch (event) {
case NETDEV_POST_INIT: case NETDEV_POST_INIT:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment