Commit 6c446b63 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlxsw-add-support-for-blackhole-nexthops'

Ido Schimmel says:

====================
mlxsw: Add support for blackhole nexthops

This patch set adds support for blackhole nexthops in mlxsw. These
nexthops are exactly the same as other nexthops, but instead of
forwarding packets to an egress router interface (RIF), they are
programmed to silently drop them.

Patches #1-#4 are preparations.

Patch #5 adds support for blackhole nexthops and removes the check that
prevented them from being programmed.

Patch #6 adds a selftests over mlxsw which tests that blackhole nexthops
can be programmed and are marked as offloaded.

Patch #7 extends the existing nexthop forwarding test to also test
blackhole functionality.

Patches #8-#10 add support for a new packet trap ('blackhole_nexthop')
which should be triggered whenever packets are dropped by a blackhole
nexthop. Obviously, by default, the trap action is set to 'drop' so that
dropped packets will not be reported.
====================

Link: https://lore.kernel.org/r/20201123071230.676469-1-idosch@idosch.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 23c01ed3 84e8feea
......@@ -476,6 +476,10 @@ be added to the following table:
* - ``esp_parsing``
- ``drop``
- Traps packets dropped due to an error in the ESP header parsing
* - ``blackhole_nexthop``
- ``drop``
- Traps packets that the device decided to drop in case they hit a
blackhole nexthop
Driver-specific Packet Traps
============================
......
......@@ -913,7 +913,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
if (mlxsw_sp_nexthop_offload(nh) &&
!mlxsw_sp_nexthop_group_has_ipip(nh))
!mlxsw_sp_nexthop_group_has_ipip(nh) &&
!mlxsw_sp_nexthop_is_discard(nh))
size++;
return size;
}
......@@ -1105,7 +1106,8 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
nh_count = 0;
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_offload(nh) ||
mlxsw_sp_nexthop_group_has_ipip(nh))
mlxsw_sp_nexthop_group_has_ipip(nh) ||
mlxsw_sp_nexthop_is_discard(nh))
continue;
if (nh_count < nh_skip)
......@@ -1186,7 +1188,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_offload(nh) ||
mlxsw_sp_nexthop_group_has_ipip(nh))
mlxsw_sp_nexthop_group_has_ipip(nh) ||
mlxsw_sp_nexthop_is_discard(nh))
continue;
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
......
......@@ -2858,9 +2858,10 @@ struct mlxsw_sp_nexthop {
offloaded:1, /* set in case the neigh is actually put into
* KVD linear area of this group.
*/
update:1; /* set indicates that MAC of this neigh should be
update:1, /* set indicates that MAC of this neigh should be
* updated in HW
*/
discard:1; /* nexthop is programmed to discard packets */
enum mlxsw_sp_nexthop_type type;
union {
struct mlxsw_sp_neigh_entry *neigh_entry;
......@@ -3011,6 +3012,11 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
return false;
}
bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
{
return nh->discard;
}
struct mlxsw_sp_nexthop_group_cmp_arg {
enum mlxsw_sp_nexthop_group_type type;
union {
......@@ -3284,8 +3290,12 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_ETHERNET,
adj_index, neigh_entry->rif);
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
adj_index, nh->rif->rif_index);
if (nh->discard)
mlxsw_reg_ratr_trap_action_set(ratr_pl,
MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
else
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
if (nh->counter_valid)
mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
else
......@@ -4128,9 +4138,7 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
{
int err = -EINVAL;
if (nh->is_reject)
NL_SET_ERR_MSG_MOD(extack, "Blackhole nexthops are not supported");
else if (nh->is_fdb)
if (nh->is_fdb)
NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
else if (nh->has_encap)
NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
......@@ -4165,7 +4173,7 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
/* Device only nexthops with an IPIP device are programmed as
* encapsulating adjacency entries.
*/
if (!nh->gw_family &&
if (!nh->gw_family && !nh->is_reject &&
!mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
return -EINVAL;
......@@ -4199,10 +4207,31 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
return true;
dev = info->nh->dev;
return info->nh->gw_family ||
return info->nh->gw_family || info->nh->is_reject ||
mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
}
static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
nh->discard = 1;
nh->should_offload = 1;
/* While nexthops that discard packets do not forward packets
* via an egress RIF, they still need to be programmed using a
* valid RIF, so use the loopback RIF created during init.
*/
nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
}
static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
nh->rif = NULL;
nh->should_offload = 0;
}
static int
mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
......@@ -4236,6 +4265,9 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_type_init;
if (nh_obj->is_reject)
mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
return 0;
err_type_init:
......@@ -4247,6 +4279,8 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
if (nh->discard)
mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
......@@ -4994,7 +5028,7 @@ int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
return err;
}
static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp)
{
enum mlxsw_reg_ratr_trap_action trap_action;
char ratr_pl[MLXSW_REG_RATR_LEN];
......@@ -5008,11 +5042,13 @@ static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
if (err)
return err;
trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
MLXSW_REG_RATR_TYPE_ETHERNET,
mlxsw_sp->router->adj_discard_index, rif_index);
mlxsw_sp->router->adj_discard_index,
mlxsw_sp->router->lb_rif_index);
mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
if (err)
goto err_ratr_write;
......@@ -5050,8 +5086,7 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
adjacency_index = nhgi->adj_index;
ecmp_size = nhgi->ecmp_size;
} else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
err = mlxsw_sp_adj_discard_write(mlxsw_sp,
nhgi->nh_rif->rif_index);
err = mlxsw_sp_adj_discard_write(mlxsw_sp);
if (err)
return err;
trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
......@@ -8918,6 +8953,30 @@ static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
kfree(router->ll_op_ctx);
}
static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
{
u16 lb_rif_index;
int err;
/* Create a generic loopback RIF associated with the main table
* (default VRF). Any table can be used, but the main table exists
* anyway, so we do not waste resources.
*/
err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
&lb_rif_index);
if (err)
return err;
mlxsw_sp->router->lb_rif_index = lb_rif_index;
return 0;
}
static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
{
mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
}
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
struct netlink_ext_ack *extack)
{
......@@ -8974,6 +9033,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_vrs_init;
err = mlxsw_sp_lb_rif_init(mlxsw_sp);
if (err)
goto err_lb_rif_init;
err = mlxsw_sp_neigh_init(mlxsw_sp);
if (err)
goto err_neigh_init;
......@@ -9039,6 +9102,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
err_mp_hash_init:
mlxsw_sp_neigh_fini(mlxsw_sp);
err_neigh_init:
mlxsw_sp_lb_rif_fini(mlxsw_sp);
err_lb_rif_init:
mlxsw_sp_vrs_fini(mlxsw_sp);
err_vrs_init:
mlxsw_sp_mr_fini(mlxsw_sp);
......@@ -9074,6 +9139,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_core_flush_owq();
WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
mlxsw_sp_neigh_fini(mlxsw_sp);
mlxsw_sp_lb_rif_fini(mlxsw_sp);
mlxsw_sp_vrs_fini(mlxsw_sp);
mlxsw_sp_mr_fini(mlxsw_sp);
mlxsw_sp_lpm_fini(mlxsw_sp);
......
......@@ -75,6 +75,7 @@ struct mlxsw_sp_router {
/* One set of ops for each protocol: IPv4 and IPv6 */
const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX];
struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx;
u16 lb_rif_index;
};
struct mlxsw_sp_fib_entry_priv {
......@@ -200,6 +201,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
u32 *p_adj_size, u32 *p_adj_hash_index);
struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh);
#define mlxsw_sp_nexthop_for_each(nh, router) \
for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \
nh = mlxsw_sp_nexthop_next(router, nh))
......
......@@ -617,7 +617,7 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
TRAP_TO_CPU),
MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_EXCEPTIONS,
TRAP_TO_CPU),
MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_EXCEPTIONS,
MLXSW_SP_RXL_EXCEPTION(RTR_EGRESS0, L3_EXCEPTIONS,
TRAP_EXCEPTION_TO_CPU),
},
},
......@@ -1007,6 +1007,12 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
false),
},
},
{
.trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_NEXTHOP, L3_DROPS),
.listeners_arr = {
MLXSW_SP_RXL_DISCARD(ROUTER3, L3_DISCARDS),
},
},
};
static struct mlxsw_sp_trap_policer_item *
......
......@@ -52,6 +52,7 @@ enum {
MLXSW_TRAP_ID_RTR_INGRESS1 = 0x71,
MLXSW_TRAP_ID_IPV6_PIM = 0x79,
MLXSW_TRAP_ID_IPV6_VRRP = 0x7A,
MLXSW_TRAP_ID_RTR_EGRESS0 = 0x80,
MLXSW_TRAP_ID_IPV4_BGP = 0x88,
MLXSW_TRAP_ID_IPV6_BGP = 0x89,
MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
......
......@@ -835,6 +835,7 @@ enum devlink_trap_generic_id {
DEVLINK_TRAP_GENERIC_ID_DCCP_PARSING,
DEVLINK_TRAP_GENERIC_ID_GTP_PARSING,
DEVLINK_TRAP_GENERIC_ID_ESP_PARSING,
DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP,
/* Add new generic trap IDs above */
__DEVLINK_TRAP_GENERIC_ID_MAX,
......@@ -1058,7 +1059,8 @@ enum devlink_trap_group_generic_id {
"gtp_parsing"
#define DEVLINK_TRAP_GENERIC_NAME_ESP_PARSING \
"esp_parsing"
#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \
"blackhole_nexthop"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
"l2_drops"
......
......@@ -9490,6 +9490,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(DCCP_PARSING, DROP),
DEVLINK_TRAP(GTP_PARSING, DROP),
DEVLINK_TRAP(ESP_PARSING, DROP),
DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
......
......@@ -52,6 +52,7 @@ ALL_TESTS="
blackhole_route_test
irif_disabled_test
erif_disabled_test
blackhole_nexthop_test
"
NUM_NETIFS=4
......@@ -647,6 +648,41 @@ erif_disabled_test()
devlink_trap_action_set $trap_name "drop"
}
__blackhole_nexthop_test()
{
local flags=$1; shift
local subnet=$1; shift
local proto=$1; shift
local dip=$1; shift
local trap_name="blackhole_nexthop"
local mz_pid
RET=0
ip -$flags nexthop add id 1 blackhole
ip -$flags route add $subnet nhid 1
tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
flower skip_hw dst_ip $dip ip_proto udp action drop
# Generate packets to the blackhole nexthop
$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
-B $dip -d 1msec -q &
mz_pid=$!
devlink_trap_drop_test $trap_name $rp2 101
log_test "Blackhole nexthop: IPv$flags"
devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
ip -$flags route del $subnet
ip -$flags nexthop del id 1
}
blackhole_nexthop_test()
{
__blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4
__blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6
}
trap cleanup EXIT
setup_prepare
......
......@@ -32,6 +32,7 @@ ALL_TESTS="
nexthop_obj_invalid_test
nexthop_obj_offload_test
nexthop_obj_group_offload_test
nexthop_obj_blackhole_offload_test
nexthop_obj_route_offload_test
devlink_reload_test
"
......@@ -693,9 +694,6 @@ nexthop_obj_invalid_test()
ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
check_fail $? "managed to configure a nexthop with MPLS encap when should not"
ip nexthop add id 1 blackhole
check_fail $? "managed to configure a blackhole nexthop when should not"
ip nexthop add id 1 dev $swp1
ip nexthop add id 2 dev $swp1
ip nexthop add id 10 group 1/2
......@@ -817,6 +815,27 @@ nexthop_obj_group_offload_test()
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
nexthop_obj_blackhole_offload_test()
{
# Test offload indication of blackhole nexthop objects
RET=0
ip nexthop add id 1 blackhole
busywait "$TIMEOUT" wait_for_offload \
ip nexthop show id 1
check_err $? "Blackhole nexthop not marked as offloaded when should"
ip nexthop add id 10 group 1
busywait "$TIMEOUT" wait_for_offload \
ip nexthop show id 10
check_err $? "Nexthop group not marked as offloaded when should"
log_test "blackhole nexthop objects offload indication"
ip nexthop del id 10
ip nexthop del id 1
}
nexthop_obj_route_offload_test()
{
# Test offload indication of routes using nexthop objects
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
ping_ipv4_blackhole
ping_ipv6_blackhole
"
NUM_NETIFS=8
source lib.sh
......@@ -302,6 +308,56 @@ multipath_test()
multipath6_l4_test "Weighted MP 11:45" 11 45
}
ping_ipv4_blackhole()
{
RET=0
ip nexthop add id 1001 blackhole
ip nexthop add id 1002 group 1001
ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
ping_do $h1 198.51.100.2
check_fail $? "ping did not fail when using a blackhole nexthop"
ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
ping_do $h1 198.51.100.2
check_fail $? "ping did not fail when using a blackhole nexthop group"
ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
ping_do $h1 198.51.100.2
check_err $? "ping failed with a valid nexthop"
log_test "IPv4 blackhole ping"
ip nexthop del id 1002
ip nexthop del id 1001
}
ping_ipv6_blackhole()
{
RET=0
ip -6 nexthop add id 1001 blackhole
ip nexthop add id 1002 group 1001
ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
ping6_do $h1 2001:db8:2::2
check_fail $? "ping did not fail when using a blackhole nexthop"
ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
ping6_do $h1 2001:db8:2::2
check_fail $? "ping did not fail when using a blackhole nexthop group"
ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
ping6_do $h1 2001:db8:2::2
check_err $? "ping failed with a valid nexthop"
log_test "IPv6 blackhole ping"
ip nexthop del id 1002
ip -6 nexthop del id 1001
}
setup_prepare()
{
h1=${NETIFS[p1]}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment