Commit 9f6cff99 authored by David S. Miller's avatar David S. Miller

Merge branch 'Simplify-IPv6-route-offload-API'

Ido Schimmel says:

====================
Simplify IPv6 route offload API

Motivation
==========

This is the IPv6 counterpart of "Simplify IPv4 route offload API" [1].
The aim of this patch set is to simplify the IPv6 route offload API by
making the stack a bit smarter about the notifications it is generating.
This allows driver authors to focus on programming the underlying device
instead of having to duplicate the IPv6 route insertion logic in their
driver, which is error-prone.

Details
=======

Today, whenever an IPv6 route is added or deleted a notification is sent
in the FIB notification chain and it is up to offload drivers to decide
if the route should be programmed to the hardware or not. This is not an
easy task as in hardware routes are keyed by {prefix, prefix length,
table id}, whereas the kernel can store multiple such routes that only
differ in metric / nexthop info.

This series makes sure that only routes that are actually used in the
data path are notified to offload drivers. This greatly simplifies the
work these drivers need to do, as they are now only concerned with
programming the hardware and do not need to replicate the IPv6 route
insertion logic and store multiple identical routes.

The route that is notified is the first route in the IPv6 FIB node,
which represents a single prefix and length in a given table. In case
the route is deleted and there is another route with the same key, a
replace notification is emitted. Otherwise, a delete notification is
emitted.

Unlike IPv4, in IPv6 it is possible to append individual nexthops to an
existing multipath route. Therefore, in addition to the replace and
delete notifications present in IPv4, an append notification is also
used.

Testing
=======

To ensure there is no degradation in route insertion rates, I averaged
the insertion rate of 512k routes (/64 and /128) over 50 runs. Did not
observe any degradation.

Functional tests are available here [2]. They rely on route trap
indication, which is added in a subsequent patch set.

In addition, I have been running syzkaller for the past couple of weeks
with debug options enabled. Did not observe any problems.

Patch set overview
==================

Patches #1-#7 gradually introduce the new FIB notifications
Patch #8 converts mlxsw to use the new notifications
Patch #9 remove the old notifications

[1] https://patchwork.ozlabs.org/cover/1209738/
[2] https://github.com/idosch/linux/tree/fib-notifier
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents af779778 caafb250
...@@ -4989,13 +4989,6 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) ...@@ -4989,13 +4989,6 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
kfree(mlxsw_sp_rt6); kfree(mlxsw_sp_rt6);
} }
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
return !(rt->fib6_flags & RTF_ADDRCONF) &&
rt->fib6_nh->fib_nh_gw_family;
}
static struct fib6_info * static struct fib6_info *
mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
{ {
...@@ -5003,37 +4996,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) ...@@ -5003,37 +4996,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
list)->rt; list)->rt;
} }
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
const struct fib6_info *nrt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
return NULL;
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
* virtual router.
*/
if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
continue;
if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
break;
if (rt->fib6_metric < nrt->fib6_metric)
continue;
if (rt->fib6_metric == nrt->fib6_metric &&
mlxsw_sp_fib6_rt_can_mp(rt))
return fib6_entry;
if (rt->fib6_metric > nrt->fib6_metric)
break;
}
return NULL;
}
static struct mlxsw_sp_rt6 * static struct mlxsw_sp_rt6 *
mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
const struct fib6_info *rt) const struct fib6_info *rt)
...@@ -5424,86 +5386,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, ...@@ -5424,86 +5386,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
kfree(fib6_entry); kfree(fib6_entry);
} }
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
const struct fib6_info *nrt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
continue;
if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
break;
if (replace && rt->fib6_metric == nrt->fib6_metric) {
if (mlxsw_sp_fib6_rt_can_mp(rt) ==
mlxsw_sp_fib6_rt_can_mp(nrt))
return fib6_entry;
if (mlxsw_sp_fib6_rt_can_mp(nrt))
fallback = fallback ?: fib6_entry;
}
if (rt->fib6_metric > nrt->fib6_metric)
return fallback ?: fib6_entry;
}
return fallback;
}
static int
mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
bool *p_replace)
{
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
struct mlxsw_sp_fib6_entry *fib6_entry;
fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
if (*p_replace && !fib6_entry)
*p_replace = false;
if (fib6_entry) {
list_add_tail(&new6_entry->common.list,
&fib6_entry->common.list);
} else {
struct mlxsw_sp_fib6_entry *last;
list_for_each_entry(last, &fib_node->entry_list, common.list) {
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
break;
fib6_entry = last;
}
if (fib6_entry)
list_add(&new6_entry->common.list,
&fib6_entry->common.list);
else
list_add(&new6_entry->common.list,
&fib_node->entry_list);
}
return 0;
}
static void
mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
{
list_del(&fib6_entry->common.list);
}
static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry, struct mlxsw_sp_fib6_entry *fib6_entry)
bool *p_replace)
{ {
struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
int err; int err;
err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace); list_add(&fib6_entry->common.list, &fib_node->entry_list);
if (err)
return err;
err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
if (err) if (err)
...@@ -5512,7 +5401,7 @@ static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, ...@@ -5512,7 +5401,7 @@ static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
return 0; return 0;
err_fib_node_entry_add: err_fib_node_entry_add:
mlxsw_sp_fib6_node_list_remove(fib6_entry); list_del(&fib6_entry->common.list);
return err; return err;
} }
...@@ -5521,7 +5410,7 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, ...@@ -5521,7 +5410,7 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry) struct mlxsw_sp_fib6_entry *fib6_entry)
{ {
mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
mlxsw_sp_fib6_node_list_remove(fib6_entry); list_del(&fib6_entry->common.list);
} }
static struct mlxsw_sp_fib6_entry * static struct mlxsw_sp_fib6_entry *
...@@ -5557,15 +5446,15 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, ...@@ -5557,15 +5446,15 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
} }
static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry, struct mlxsw_sp_fib6_entry *fib6_entry)
bool replace)
{ {
struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
struct mlxsw_sp_fib6_entry *replaced; struct mlxsw_sp_fib6_entry *replaced;
if (!replace) if (list_is_singular(&fib_node->entry_list))
return; return;
/* We inserted the new entry before replaced one */
replaced = list_next_entry(fib6_entry, common.list); replaced = list_next_entry(fib6_entry, common.list);
mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
...@@ -5573,9 +5462,9 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, ...@@ -5573,9 +5462,9 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
} }
static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
struct fib6_info **rt_arr, struct fib6_info **rt_arr,
unsigned int nrt6, bool replace) unsigned int nrt6)
{ {
struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib_node *fib_node;
...@@ -5599,18 +5488,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, ...@@ -5599,18 +5488,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
if (IS_ERR(fib_node)) if (IS_ERR(fib_node))
return PTR_ERR(fib_node); return PTR_ERR(fib_node);
/* Before creating a new entry, try to append route to an existing
* multipath entry.
*/
fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
if (fib6_entry) {
err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
rt_arr, nrt6);
if (err)
goto err_fib6_entry_nexthop_add;
return 0;
}
fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
nrt6); nrt6);
if (IS_ERR(fib6_entry)) { if (IS_ERR(fib6_entry)) {
...@@ -5618,17 +5495,61 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, ...@@ -5618,17 +5495,61 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
goto err_fib6_entry_create; goto err_fib6_entry_create;
} }
err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace); err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry);
if (err) if (err)
goto err_fib6_node_entry_link; goto err_fib6_node_entry_link;
mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry);
return 0; return 0;
err_fib6_node_entry_link: err_fib6_node_entry_link:
mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
err_fib6_entry_create: err_fib6_entry_create:
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
return err;
}
static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
struct fib6_info **rt_arr,
unsigned int nrt6)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
struct fib6_info *rt = rt_arr[0];
int err;
if (mlxsw_sp->router->aborted)
return 0;
if (rt->fib6_src.plen)
return -EINVAL;
if (mlxsw_sp_fib6_rt_should_ignore(rt))
return 0;
fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
&rt->fib6_dst.addr,
sizeof(rt->fib6_dst.addr),
rt->fib6_dst.plen,
MLXSW_SP_L3_PROTO_IPV6);
if (IS_ERR(fib_node))
return PTR_ERR(fib_node);
if (WARN_ON_ONCE(list_empty(&fib_node->entry_list))) {
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
return -EINVAL;
}
fib6_entry = list_first_entry(&fib_node->entry_list,
struct mlxsw_sp_fib6_entry, common.list);
err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
nrt6);
if (err)
goto err_fib6_entry_nexthop_add;
return 0;
err_fib6_entry_nexthop_add: err_fib6_entry_nexthop_add:
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
return err; return err;
...@@ -6039,20 +5960,24 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) ...@@ -6039,20 +5960,24 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
struct mlxsw_sp_fib_event_work *fib_work = struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work); container_of(work, struct mlxsw_sp_fib_event_work, work);
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
bool replace;
int err; int err;
rtnl_lock(); rtnl_lock();
mlxsw_sp_span_respin(mlxsw_sp); mlxsw_sp_span_respin(mlxsw_sp);
switch (fib_work->event) { switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_ADD: err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
err = mlxsw_sp_router_fib6_add(mlxsw_sp,
fib_work->fib6_work.rt_arr, fib_work->fib6_work.rt_arr,
fib_work->fib6_work.nrt6, fib_work->fib6_work.nrt6);
replace); if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp);
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
break;
case FIB_EVENT_ENTRY_APPEND:
err = mlxsw_sp_router_fib6_append(mlxsw_sp,
fib_work->fib6_work.rt_arr,
fib_work->fib6_work.nrt6);
if (err) if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp); mlxsw_sp_router_fib_abort(mlxsw_sp);
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
...@@ -6144,7 +6069,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, ...@@ -6144,7 +6069,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
switch (fib_work->event) { switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_DEL: case FIB_EVENT_ENTRY_DEL:
fen6_info = container_of(info, struct fib6_entry_notifier_info, fen6_info = container_of(info, struct fib6_entry_notifier_info,
info); info);
...@@ -6248,7 +6173,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, ...@@ -6248,7 +6173,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
router->mlxsw_sp); router->mlxsw_sp);
return notifier_from_errno(err); return notifier_from_errno(err);
case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND:
if (router->aborted) { if (router->aborted) {
NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
return notifier_from_errno(-EINVAL); return notifier_from_errno(-EINVAL);
......
...@@ -178,7 +178,6 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, ...@@ -178,7 +178,6 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
break; break;
case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL: case FIB_EVENT_ENTRY_DEL:
err = nsim_fib_event(data, info, event != FIB_EVENT_ENTRY_DEL); err = nsim_fib_event(data, info, event != FIB_EVENT_ENTRY_DEL);
break; break;
......
...@@ -487,6 +487,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net, ...@@ -487,6 +487,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
struct fib6_info *rt, struct fib6_info *rt,
unsigned int nsiblings, unsigned int nsiblings,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt);
void fib6_rt_update(struct net *net, struct fib6_info *rt, void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct nl_info *info); struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
......
...@@ -370,6 +370,21 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, ...@@ -370,6 +370,21 @@ static int call_fib6_entry_notifier(struct notifier_block *nb,
return call_fib6_notifier(nb, event_type, &info.info); return call_fib6_notifier(nb, event_type, &info.info);
} }
static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib6_info *rt,
unsigned int nsiblings,
struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
.info.extack = extack,
.rt = rt,
.nsiblings = nsiblings,
};
return call_fib6_notifier(nb, event_type, &info.info);
}
int call_fib6_entry_notifiers(struct net *net, int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type, enum fib_event_type event_type,
struct fib6_info *rt, struct fib6_info *rt,
...@@ -400,6 +415,17 @@ int call_fib6_multipath_entry_notifiers(struct net *net, ...@@ -400,6 +415,17 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info); return call_fib6_notifiers(net, event_type, &info.info);
} }
int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
{
struct fib6_entry_notifier_info info = {
.rt = rt,
.nsiblings = rt->fib6_nsiblings,
};
rt->fib6_table->fib_seq++;
return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}
struct fib6_dump_arg { struct fib6_dump_arg {
struct net *net; struct net *net;
struct notifier_block *nb; struct notifier_block *nb;
...@@ -408,22 +434,29 @@ struct fib6_dump_arg { ...@@ -408,22 +434,29 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{ {
if (rt == arg->net->ipv6.fib6_null_entry) enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
int err;
if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0; return 0;
return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
rt, arg->extack); if (rt->fib6_nsiblings)
err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
rt,
rt->fib6_nsiblings,
arg->extack);
else
err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
arg->extack);
return err;
} }
static int fib6_node_dump(struct fib6_walker *w) static int fib6_node_dump(struct fib6_walker *w)
{ {
struct fib6_info *rt; int err;
int err = 0;
for_each_fib6_walker_rt(w) { err = fib6_rt_dump(w->leaf, w->args);
err = fib6_rt_dump(rt, w->args);
if (err)
break;
}
w->leaf = NULL; w->leaf = NULL;
return err; return err;
} }
...@@ -1039,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1039,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE)); (info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0; int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
bool notify_sibling_rt = false;
u16 nlflags = NLM_F_EXCL; u16 nlflags = NLM_F_EXCL;
int err; int err;
...@@ -1130,6 +1164,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1130,6 +1164,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
/* Find the first route that have the same metric */ /* Find the first route that have the same metric */
sibling = leaf; sibling = leaf;
notify_sibling_rt = true;
while (sibling) { while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric && if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) { rt6_qualify_for_ecmp(sibling)) {
...@@ -1139,6 +1174,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1139,6 +1174,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
} }
sibling = rcu_dereference_protected(sibling->fib6_next, sibling = rcu_dereference_protected(sibling->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock)); lockdep_is_held(&rt->fib6_table->tb6_lock));
notify_sibling_rt = false;
} }
/* For each sibling in the list, increment the counter of /* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings * siblings. BUG() if counters does not match, list of siblings
...@@ -1165,10 +1201,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1165,10 +1201,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add: add:
nlflags |= NLM_F_CREATE; nlflags |= NLM_F_CREATE;
if (!info->skip_notify_kernel) { /* The route should only be notified if it is the first
* route in the node or if it is added as a sibling
* route to the first route in the node.
*/
if (!info->skip_notify_kernel &&
(notify_sibling_rt || ins == &fn->leaf)) {
enum fib_event_type fib_event;
if (notify_sibling_rt)
fib_event = FIB_EVENT_ENTRY_APPEND;
else
fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib6_entry_notifiers(info->nl_net, err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD, fib_event, rt,
rt, extack); extack);
if (err) { if (err) {
struct fib6_info *sibling, *next_sibling; struct fib6_info *sibling, *next_sibling;
...@@ -1212,7 +1259,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1212,7 +1259,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
return -ENOENT; return -ENOENT;
} }
if (!info->skip_notify_kernel) { if (!info->skip_notify_kernel && ins == &fn->leaf) {
err = call_fib6_entry_notifiers(info->nl_net, err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE, FIB_EVENT_ENTRY_REPLACE,
rt, extack); rt, extack);
...@@ -1845,13 +1892,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net, ...@@ -1845,13 +1892,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct fib6_info __rcu **rtp, struct nl_info *info) struct fib6_info __rcu **rtp, struct nl_info *info)
{ {
struct fib6_info *leaf, *replace_rt = NULL;
struct fib6_walker *w; struct fib6_walker *w;
struct fib6_info *rt = rcu_dereference_protected(*rtp, struct fib6_info *rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock)); lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net; struct net *net = info->nl_net;
bool notify_del = false;
RT6_TRACE("fib6_del_route\n"); RT6_TRACE("fib6_del_route\n");
/* If the deleted route is the first in the node and it is not part of
* a multipath route, then we need to replace it with the next route
* in the node, if exists.
*/
leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&table->tb6_lock));
if (leaf == rt && !rt->fib6_nsiblings) {
if (rcu_access_pointer(rt->fib6_next))
replace_rt = rcu_dereference_protected(rt->fib6_next,
lockdep_is_held(&table->tb6_lock));
else
notify_del = true;
}
/* Unlink it */ /* Unlink it */
*rtp = rt->fib6_next; *rtp = rt->fib6_next;
rt->fib6_node = NULL; rt->fib6_node = NULL;
...@@ -1869,6 +1932,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, ...@@ -1869,6 +1932,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
if (rt->fib6_nsiblings) { if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling; struct fib6_info *sibling, *next_sibling;
/* The route is deleted from a multipath route. If this
* multipath route is the first route in the node, then we need
* to emit a delete notification. Otherwise, we need to skip
* the notification.
*/
if (rt->fib6_metric == leaf->fib6_metric &&
rt6_qualify_for_ecmp(leaf))
notify_del = true;
list_for_each_entry_safe(sibling, next_sibling, list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) &rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--; sibling->fib6_nsiblings--;
...@@ -1904,8 +1975,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, ...@@ -1904,8 +1975,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net); fib6_purge_rt(rt, fn, net);
if (!info->skip_notify_kernel) if (!info->skip_notify_kernel) {
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (notify_del)
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
rt, NULL);
else if (replace_rt)
call_fib6_entry_notifiers_replace(net, replace_rt);
}
if (!info->skip_notify) if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0); inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
......
...@@ -3749,6 +3749,7 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) ...@@ -3749,6 +3749,7 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
struct fib6_info *sibling, *next_sibling; struct fib6_info *sibling, *next_sibling;
struct fib6_node *fn;
/* prefer to send a single notification with all hops */ /* prefer to send a single notification with all hops */
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
...@@ -3764,12 +3765,32 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) ...@@ -3764,12 +3765,32 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1; info->skip_notify = 1;
} }
/* 'rt' points to the first sibling route. If it is not the
* leaf, then we do not need to send a notification. Otherwise,
* we need to check if the last sibling has a next route or not
* and emit a replace or delete notification, respectively.
*/
info->skip_notify_kernel = 1; info->skip_notify_kernel = 1;
fn = rcu_dereference_protected(rt->fib6_node,
lockdep_is_held(&table->tb6_lock));
if (rcu_access_pointer(fn->leaf) == rt) {
struct fib6_info *last_sibling, *replace_rt;
last_sibling = list_last_entry(&rt->fib6_siblings,
struct fib6_info,
fib6_siblings);
replace_rt = rcu_dereference_protected(
last_sibling->fib6_next,
lockdep_is_held(&table->tb6_lock));
if (replace_rt)
call_fib6_entry_notifiers_replace(net,
replace_rt);
else
call_fib6_multipath_entry_notifiers(net, call_fib6_multipath_entry_notifiers(net,
FIB_EVENT_ENTRY_DEL, FIB_EVENT_ENTRY_DEL,
rt, rt, rt->fib6_nsiblings,
rt->fib6_nsiblings,
NULL); NULL);
}
list_for_each_entry_safe(sibling, next_sibling, list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, &rt->fib6_siblings,
fib6_siblings) { fib6_siblings) {
...@@ -5017,12 +5038,37 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, ...@@ -5017,12 +5038,37 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
} }
static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
{
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
bool should_notify = false;
struct fib6_info *leaf;
struct fib6_node *fn;
rcu_read_lock();
fn = rcu_dereference(rt->fib6_node);
if (!fn)
goto out;
leaf = rcu_dereference(fn->leaf);
if (!leaf)
goto out;
if (rt == leaf ||
(rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
rt6_qualify_for_ecmp(leaf)))
should_notify = true;
out:
rcu_read_unlock();
return should_notify;
}
static int ip6_route_multipath_add(struct fib6_config *cfg, static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo; struct nl_info *info = &cfg->fc_nlinfo;
enum fib_event_type event_type;
struct fib6_config r_cfg; struct fib6_config r_cfg;
struct rtnexthop *rtnh; struct rtnexthop *rtnh;
struct fib6_info *rt; struct fib6_info *rt;
...@@ -5147,14 +5193,28 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, ...@@ -5147,14 +5193,28 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++; nhn++;
} }
event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD; /* An in-kernel notification should only be sent in case the new
err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type, * multipath route is added as the first route in the node, or if
rt_notif, nhn - 1, extack); * it was appended to it. We pass 'rt_notif' since it is the first
* sibling and might allow us to skip some checks in the replace case.
*/
if (ip6_route_mpath_should_notify(rt_notif)) {
enum fib_event_type fib_event;
if (rt_notif->fib6_nsiblings != nhn - 1)
fib_event = FIB_EVENT_ENTRY_APPEND;
else
fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib6_multipath_entry_notifiers(info->nl_net,
fib_event, rt_notif,
nhn - 1, extack);
if (err) { if (err) {
/* Delete all the siblings that were just added */ /* Delete all the siblings that were just added */
err_nh = NULL; err_nh = NULL;
goto add_errout; goto add_errout;
} }
}
/* success ... tell user about new route */ /* success ... tell user about new route */
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment