Commit eb9da2c1 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-resilient-nh-groups'

Ido Schimmel says:

====================
mlxsw: Add support for resilient nexthop groups

This patchset adds support for resilient nexthop groups in mlxsw. As far
as the hardware is concerned, resilient groups are the same as regular
groups. The differences lie in how mlxsw manages the individual
adjacency entries (nexthop buckets) that make up the group.

The first difference is that unlike regular groups the driver needs to
periodically update the kernel about activity of nexthop buckets so that
the kernel will not treat the buckets as idle, given traffic is
offloaded from the CPU to the ASIC. This is similar to what mlxsw is
already doing with respect to neighbour entries. The update interval is
set to 1 second to allow for short idle timers.

The second difference is that nexthop buckets that correspond to an
unresolved neighbour must be programmed to the device, as the size of
the group must remain fixed. This is achieved by programming such
entries with trap action, in order to trigger neighbour resolution by
the kernel.

The third difference is atomic replacement of individual nexthop
buckets. While the driver periodically updates the kernel about activity
of nexthop buckets, it is possible for a bucket to become active just
before the kernel decides to replace it with a different nexthop. To
avoid such situations and connections being reset, the driver instructs
the device to only replace an adjacency entry if it is inactive.
Failures are propagated back to the nexthop code.

Patchset overview:

Patches #1-#7 gradually add support for resilient nexthop groups

Patch #8 finally enables such groups to be programmed to the device

Patches #9-#10 add mlxsw-specific selftests
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3c85a8b8 ffd3e9b0
......@@ -8130,6 +8130,60 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
}
/* RATRAD - Router Adjacency Table Activity Dump Register
* ------------------------------------------------------
* The RATRAD register is used to dump and optionally clear activity bits of
* router adjacency table entries.
*/
#define MLXSW_REG_RATRAD_ID 0x8022
#define MLXSW_REG_RATRAD_LEN 0x210
MLXSW_REG_DEFINE(ratrad, MLXSW_REG_RATRAD_ID, MLXSW_REG_RATRAD_LEN);
enum {
/* Read activity */
MLXSW_REG_RATRAD_OP_READ_ACTIVITY,
/* Read and clear activity */
MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY,
};
/* reg_ratrad_op
* Access: Operation
*/
MLXSW_ITEM32(reg, ratrad, op, 0x00, 30, 2);
/* reg_ratrad_ecmp_size
* ecmp_size is the amount of sequential entries from adjacency_index. Valid
* ranges:
* Spectrum-1: 32-64, 512, 1024, 2048, 4096
* Spectrum-2/3: 32-128, 256, 512, 1024, 2048, 4096
* Access: Index
*/
MLXSW_ITEM32(reg, ratrad, ecmp_size, 0x00, 0, 13);
/* reg_ratrad_adjacency_index
* Index into the adjacency table.
* Access: Index
*/
MLXSW_ITEM32(reg, ratrad, adjacency_index, 0x04, 0, 24);
/* reg_ratrad_activity_vector
* Activity bit per adjacency index.
* Bits higher than ecmp_size are reserved.
* Access: RO
*/
MLXSW_ITEM_BIT_ARRAY(reg, ratrad, activity_vector, 0x10, 0x200, 1);
static inline void mlxsw_reg_ratrad_pack(char *payload, u32 adjacency_index,
u16 ecmp_size)
{
MLXSW_REG_ZERO(ratrad, payload);
mlxsw_reg_ratrad_op_set(payload,
MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY);
mlxsw_reg_ratrad_ecmp_size_set(payload, ecmp_size);
mlxsw_reg_ratrad_adjacency_index_set(payload, adjacency_index);
}
/* RIGR-V2 - Router Interface Group Register Version 2
* ---------------------------------------------------
* The RIGR_V2 register is used to add, remove and query egress interface list
......@@ -12114,6 +12168,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(rtar),
MLXSW_REG(ratr),
MLXSW_REG(rtdp),
MLXSW_REG(ratrad),
MLXSW_REG(rdpm),
MLXSW_REG(ricnt),
MLXSW_REG(rrcr),
......
......@@ -1178,6 +1178,7 @@ mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled,
static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
{
char ratr_pl[MLXSW_REG_RATR_LEN];
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_nexthop *nh;
u32 adj_hash_index = 0;
......@@ -1196,7 +1197,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
else
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
mlxsw_sp_nexthop_eth_update(mlxsw_sp,
adj_index + adj_hash_index, nh);
adj_index + adj_hash_index, nh,
true, ratr_pl);
}
return 0;
}
......
......@@ -127,14 +127,16 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
static int
mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
struct mlxsw_sp_ipip_entry *ipip_entry,
bool force, char *ratr_pl)
{
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
char ratr_pl[MLXSW_REG_RATR_LEN];
enum mlxsw_reg_ratr_op op;
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_IPIP,
op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP,
adj_index, rif_index);
mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
......
......@@ -40,7 +40,8 @@ struct mlxsw_sp_ipip_ops {
enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry);
struct mlxsw_sp_ipip_entry *ipip_entry,
bool force, char *ratr_pl);
bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev);
......
......@@ -2911,7 +2911,9 @@ struct mlxsw_sp_nexthop_group_info {
u16 count;
int sum_norm_weight;
u8 adj_index_valid:1,
gateway:1; /* routes using the group use a gateway */
gateway:1, /* routes using the group use a gateway */
is_resilient:1;
struct list_head list; /* member in nh_res_grp_list */
struct mlxsw_sp_nexthop nexthops[0];
#define nh_rif nexthops[0].rif
};
......@@ -3418,16 +3420,18 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index,
struct mlxsw_sp_nexthop *nh)
struct mlxsw_sp_nexthop *nh,
bool force, char *ratr_pl)
{
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
char ratr_pl[MLXSW_REG_RATR_LEN];
enum mlxsw_reg_ratr_op op;
u16 rif_index;
rif_index = nh->rif ? nh->rif->rif_index :
mlxsw_sp->router->lb_rif_index;
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_ETHERNET,
op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
adj_index, rif_index);
switch (nh->action) {
case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
......@@ -3455,7 +3459,8 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
}
int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh)
struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl)
{
int i;
......@@ -3463,7 +3468,7 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
int err;
err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
nh);
nh, force, ratr_pl);
if (err)
return err;
}
......@@ -3473,17 +3478,20 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index,
struct mlxsw_sp_nexthop *nh)
struct mlxsw_sp_nexthop *nh,
bool force, char *ratr_pl)
{
const struct mlxsw_sp_ipip_ops *ipip_ops;
ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
force, ratr_pl);
}
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index,
struct mlxsw_sp_nexthop *nh)
struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl)
{
int i;
......@@ -3491,7 +3499,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
int err;
err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
nh);
nh, force, ratr_pl);
if (err)
return err;
}
......@@ -3500,7 +3508,8 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
}
static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh)
struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl)
{
/* When action is discard or trap, the nexthop must be
* programmed as an Ethernet nexthop.
......@@ -3508,9 +3517,11 @@ static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh);
return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
force, ratr_pl);
else
return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh);
return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
force, ratr_pl);
}
static int
......@@ -3518,6 +3529,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group_info *nhgi,
bool reallocate)
{
char ratr_pl[MLXSW_REG_RATR_LEN];
u32 adj_index = nhgi->adj_index; /* base */
struct mlxsw_sp_nexthop *nh;
int i;
......@@ -3533,7 +3545,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
if (nh->update || reallocate) {
int err = 0;
err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh);
err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
true, ratr_pl);
if (err)
return err;
nh->update = 0;
......@@ -3750,10 +3763,30 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
}
static void
mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_nexthop *nh,
u16 bucket_index)
{
struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
bool offload = false, trap = false;
if (nh->offloaded) {
if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
trap = true;
else
offload = true;
}
nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
bucket_index, offload, trap);
}
static void
mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
int i;
/* Do not update the flags if the nexthop group is being destroyed
* since:
* 1. The nexthop objects is being deleted, in which case the flags are
......@@ -3767,6 +3800,18 @@ mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
nh_grp->nhgi->adj_index_valid, false);
/* Update flags of individual nexthop buckets in case of a resilient
* nexthop group.
*/
if (!nh_grp->nhgi->is_resilient)
return;
for (i = 0; i < nh_grp->nhgi->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
}
}
static void
......@@ -3820,6 +3865,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
goto set_trap;
}
/* Flags of individual nexthop buckets might need to be
* updated.
*/
mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
return 0;
}
mlxsw_sp_nexthop_group_normalize(nhgi);
......@@ -3905,6 +3954,9 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
if (!removing) {
nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
nh->should_offload = 1;
} else if (nh->nhgi->is_resilient) {
nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
nh->should_offload = 1;
} else {
nh->should_offload = 0;
}
......@@ -4322,6 +4374,85 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
}
}
static void
mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_nexthop_group *nh_grp,
unsigned long *activity)
{
char *ratrad_pl;
int i, err;
ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
if (!ratrad_pl)
return;
mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
nh_grp->nhgi->count);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
if (err)
goto out;
for (i = 0; i < nh_grp->nhgi->count; i++) {
if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
continue;
bitmap_set(activity, i, 1);
}
out:
kfree(ratrad_pl);
}
#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
static void
mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_nexthop_group *nh_grp)
{
unsigned long *activity;
activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
if (!activity)
return;
mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
nh_grp->nhgi->count, activity);
bitmap_free(activity);
}
static void
mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
{
unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
msecs_to_jiffies(interval));
}
static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
{
struct mlxsw_sp_nexthop_group_info *nhgi;
struct mlxsw_sp_router *router;
bool reschedule = false;
router = container_of(work, struct mlxsw_sp_router,
nh_grp_activity_dw.work);
mutex_lock(&router->lock);
list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
reschedule = true;
}
mutex_unlock(&router->lock);
if (!reschedule)
return;
mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
}
static int
mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
const struct nh_notifier_single_info *nh,
......@@ -4388,11 +4519,86 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
return 0;
}
static int
mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
const struct nh_notifier_res_table_info *nh_res_table,
struct netlink_ext_ack *extack)
{
unsigned int alloc_size;
bool valid_size = false;
int err, i;
if (nh_res_table->num_nh_buckets < 32) {
NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
return -EINVAL;
}
for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
const struct mlxsw_sp_adj_grp_size_range *size_range;
size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
if (nh_res_table->num_nh_buckets >= size_range->start &&
nh_res_table->num_nh_buckets <= size_range->end) {
valid_size = true;
break;
}
}
if (!valid_size) {
NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
return -EINVAL;
}
err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
nh_res_table->num_nh_buckets,
&alloc_size);
if (err || nh_res_table->num_nh_buckets != alloc_size) {
NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
return -EINVAL;
}
return 0;
}
static int
mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
const struct nh_notifier_res_table_info *nh_res_table,
struct netlink_ext_ack *extack)
{
int err;
u16 i;
err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
nh_res_table,
extack);
if (err)
return err;
for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
const struct nh_notifier_single_info *nh;
int err;
nh = &nh_res_table->nhs[i];
err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
extack);
if (err)
return err;
}
return 0;
}
static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
unsigned long event,
struct nh_notifier_info *info)
{
if (event != NEXTHOP_EVENT_REPLACE)
struct nh_notifier_single_info *nh;
if (event != NEXTHOP_EVENT_REPLACE &&
event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
event != NEXTHOP_EVENT_BUCKET_REPLACE)
return 0;
switch (info->type) {
......@@ -4403,6 +4609,14 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
info->nh_grp,
info->extack);
case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
info->nh_res_table,
info->extack);
case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
nh = &info->nh_res_bucket->new_nh;
return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
info->extack);
default:
NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
return -EOPNOTSUPP;
......@@ -4420,6 +4634,7 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
return info->nh->gw_family || info->nh->is_reject ||
mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
case NH_NOTIFIER_INFO_TYPE_GRP:
case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
/* Already validated earlier. */
return true;
default:
......@@ -4484,6 +4699,15 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
if (nh_obj->is_reject)
mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
/* In a resilient nexthop group, all the nexthops must be written to
* the adjacency table. Even if they do not have a valid neighbour or
* RIF.
*/
if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
nh->should_offload = 1;
}
return 0;
err_type_init:
......@@ -4500,6 +4724,7 @@ static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
nh->should_offload = 0;
}
static int
......@@ -4509,6 +4734,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_nexthop_group_info *nhgi;
struct mlxsw_sp_nexthop *nh;
bool is_resilient = false;
unsigned int nhs;
int err, i;
......@@ -4519,6 +4745,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
case NH_NOTIFIER_INFO_TYPE_GRP:
nhs = info->nh_grp->num_nh;
break;
case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
nhs = info->nh_res_table->num_nh_buckets;
is_resilient = true;
break;
default:
return -EINVAL;
}
......@@ -4529,6 +4759,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
nh_grp->nhgi = nhgi;
nhgi->nh_grp = nh_grp;
nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
nhgi->is_resilient = is_resilient;
nhgi->count = nhs;
for (i = 0; i < nhgi->count; i++) {
struct nh_notifier_single_info *nh_obj;
......@@ -4544,6 +4775,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
nh_obj = &info->nh_grp->nh_entries[i].nh;
weight = info->nh_grp->nh_entries[i].weight;
break;
case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
nh_obj = &info->nh_res_table->nhs[i];
weight = 1;
break;
default:
err = -EINVAL;
goto err_nexthop_obj_init;
......@@ -4559,6 +4794,15 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
goto err_group_refresh;
}
/* Add resilient nexthop groups to a list so that the activity of their
* nexthop buckets will be periodically queried and cleared.
*/
if (nhgi->is_resilient) {
if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
}
return 0;
err_group_refresh:
......@@ -4577,8 +4821,15 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
struct mlxsw_sp_router *router = mlxsw_sp->router;
int i;
if (nhgi->is_resilient) {
list_del(&nhgi->list);
if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
cancel_delayed_work(&router->nh_grp_activity_dw);
}
for (i = nhgi->count - 1; i >= 0; i--) {
struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
......@@ -4771,6 +5022,135 @@ static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
}
static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
u32 adj_index, char *ratr_pl)
{
MLXSW_REG_ZERO(ratr, ratr_pl);
mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
}
static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
{
/* Clear the opcode and activity on both the old and new payload as
* they are irrelevant for the comparison.
*/
mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
mlxsw_reg_ratr_a_set(ratr_pl, 0);
mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
/* If the contents of the adjacency entry are consistent with the
* replacement request, then replacement was successful.
*/
if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
return 0;
return -EINVAL;
}
static int
mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh,
struct nh_notifier_info *info)
{
u16 bucket_index = info->nh_res_bucket->bucket_index;
struct netlink_ext_ack *extack = info->extack;
bool force = info->nh_res_bucket->force;
char ratr_pl_new[MLXSW_REG_RATR_LEN];
char ratr_pl[MLXSW_REG_RATR_LEN];
u32 adj_index;
int err;
/* No point in trying an atomic replacement if the idle timer interval
* is smaller than the interval in which we query and clear activity.
*/
force = info->nh_res_bucket->idle_timer_ms <
MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
adj_index = nh->nhgi->adj_index + bucket_index;
err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
return err;
}
if (!force) {
err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
ratr_pl_new);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
return err;
}
err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
return err;
}
}
nh->update = 0;
nh->offloaded = 1;
mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
return 0;
}
static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
struct nh_notifier_info *info)
{
u16 bucket_index = info->nh_res_bucket->bucket_index;
struct netlink_ext_ack *extack = info->extack;
struct mlxsw_sp_nexthop_group_info *nhgi;
struct nh_notifier_single_info *nh_obj;
struct mlxsw_sp_nexthop_group *nh_grp;
struct mlxsw_sp_nexthop *nh;
int err;
nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
if (!nh_grp) {
NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
return -EINVAL;
}
nhgi = nh_grp->nhgi;
if (bucket_index >= nhgi->count) {
NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
return -EINVAL;
}
nh = &nhgi->nexthops[bucket_index];
mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
nh_obj = &info->nh_res_bucket->new_nh;
err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
goto err_nexthop_obj_init;
}
err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
if (err)
goto err_nexthop_obj_bucket_adj_update;
return 0;
err_nexthop_obj_bucket_adj_update:
mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
err_nexthop_obj_init:
nh_obj = &info->nh_res_bucket->old_nh;
mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
/* The old adjacency entry was not overwritten */
nh->update = 0;
nh->offloaded = 1;
return err;
}
static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
......@@ -4792,6 +5172,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
case NEXTHOP_EVENT_DEL:
mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
break;
case NEXTHOP_EVENT_BUCKET_REPLACE:
err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
info);
break;
default:
break;
}
......@@ -9446,6 +9830,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_ll_op_ctx_init;
INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
mlxsw_sp_nh_grp_activity_work);
INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
err = __mlxsw_sp_router_init(mlxsw_sp);
if (err)
......@@ -9569,6 +9957,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
err_rifs_init:
__mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
mlxsw_sp_router_ll_op_ctx_fini(router);
err_ll_op_ctx_init:
mlxsw_sp_router_xm_fini(mlxsw_sp);
......@@ -9600,6 +9989,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp_ipips_fini(mlxsw_sp);
mlxsw_sp_rifs_fini(mlxsw_sp);
__mlxsw_sp_router_fini(mlxsw_sp);
cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
mlxsw_sp_router_xm_fini(mlxsw_sp);
mutex_destroy(&mlxsw_sp->router->lock);
......
......@@ -80,6 +80,8 @@ struct mlxsw_sp_router {
struct mlxsw_sp_router_xm *xm;
const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges;
size_t adj_grp_size_ranges_count;
struct delayed_work nh_grp_activity_dw;
struct list_head nh_res_grp_list;
};
struct mlxsw_sp_fib_entry_priv {
......@@ -209,7 +211,8 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh, u64 *p_counter);
int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh);
struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl);
void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh);
void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
......
......@@ -446,6 +446,35 @@ __invalid_nexthop_test()
log_test "Unresolved neigh: nexthop does not exist: $desc"
}
__invalid_nexthop_bucket_test()
{
local desc=$1; shift
local dip=$1; shift
local via_add=$1; shift
local trap_name="unresolved_neigh"
RET=0
# Check that route to nexthop that does not exist triggers
# unresolved_neigh
ip nexthop add id 1 via $via_add dev $rp2
ip nexthop add id 10 group 1 type resilient buckets 32
ip route add $dip nhid 10
t0_packets=$(devlink_trap_rx_packets_get $trap_name)
ping_do $h1 $dip
t1_packets=$(devlink_trap_rx_packets_get $trap_name)
if [[ $t0_packets -eq $t1_packets ]]; then
check_err 1 "Trap counter did not increase"
fi
ip route del $dip nhid 10
ip nexthop del id 10
ip nexthop del id 1
log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
}
unresolved_neigh_test()
{
__host_miss_test "IPv4" 198.51.100.1
......@@ -453,6 +482,8 @@ unresolved_neigh_test()
__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
2001:db8:2::4
__invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
__invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
}
vrf_without_routes_create()
......
......@@ -33,6 +33,7 @@ ALL_TESTS="
nexthop_obj_invalid_test
nexthop_obj_offload_test
nexthop_obj_group_offload_test
nexthop_obj_bucket_offload_test
nexthop_obj_blackhole_offload_test
nexthop_obj_route_offload_test
devlink_reload_test
......@@ -739,11 +740,28 @@ nexthop_obj_invalid_test()
ip nexthop add id 1 dev $swp1
ip nexthop add id 2 dev $swp1
ip nexthop add id 3 via 192.0.2.3 dev $swp1
ip nexthop add id 10 group 1/2
check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
ip nexthop add id 10 group 3 type resilient buckets 7
check_fail $? "managed to configure a too small resilient nexthop group when should not"
ip nexthop add id 10 group 3 type resilient buckets 129
check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
ip nexthop add id 10 group 1/2 type resilient buckets 32
check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
ip nexthop add id 10 group 3 type resilient buckets 32
check_err $? "failed to configure a valid resilient nexthop group"
ip nexthop replace id 3 dev $swp1
check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
log_test "nexthop objects - invalid configurations"
ip nexthop del id 10
ip nexthop del id 3
ip nexthop del id 2
ip nexthop del id 1
......@@ -858,6 +876,70 @@ nexthop_obj_group_offload_test()
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
nexthop_obj_bucket_offload_test()
{
# Test offload indication of nexthop buckets
RET=0
simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
simple_if_init $swp2
setup_wait
ip nexthop add id 1 via 192.0.2.2 dev $swp1
ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
dev $swp1
ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
dev $swp1
ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
dev $swp1
busywait "$TIMEOUT" wait_for_offload \
ip nexthop bucket show nhid 1
check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
busywait "$TIMEOUT" wait_for_offload \
ip nexthop bucket show nhid 2
check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
# Invalidate nexthop id 1
ip neigh replace 192.0.2.2 nud failed dev $swp1
busywait "$TIMEOUT" wait_for_trap \
ip nexthop bucket show nhid 1
check_err $? "IPv4 nexthop buckets not marked with trap when should"
# Invalidate nexthop id 2
ip neigh replace 2001:db8:1::2 nud failed dev $swp1
busywait "$TIMEOUT" wait_for_trap \
ip nexthop bucket show nhid 2
check_err $? "IPv6 nexthop buckets not marked with trap when should"
# Revalidate nexthop id 1 by changing its configuration
ip nexthop replace id 1 via 192.0.2.3 dev $swp1
busywait "$TIMEOUT" wait_for_offload \
ip nexthop bucket show nhid 1
check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
# Revalidate nexthop id 2 by changing its neighbour
ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
dev $swp1
busywait "$TIMEOUT" wait_for_offload \
ip nexthop bucket show nhid 2
check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
log_test "nexthop bucket offload indication"
ip neigh del 2001:db8:1::2 dev $swp1
ip neigh del 192.0.2.3 dev $swp1
ip neigh del 192.0.2.2 dev $swp1
ip nexthop del id 10
ip nexthop del id 2
ip nexthop del id 1
simple_if_fini $swp2
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
nexthop_obj_blackhole_offload_test()
{
# Test offload indication of blackhole nexthop objects
......
......@@ -353,6 +353,11 @@ wait_for_offload()
"$@" | grep -q offload
}
wait_for_trap()
{
"$@" | grep -q trap
}
until_counter_is()
{
local expr=$1; shift
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment