Commit f2e83469 authored by David S. Miller's avatar David S. Miller

Merge branch 'drop_monitor-Convert-to-use-devlink-tracepoint'

Ido Schimmel says:

====================
drop_monitor: Convert to use devlink tracepoint

Drop monitor is able to monitor both software and hardware originated
drops. Software drops are monitored by having drop monitor register its
probe on the 'kfree_skb' tracepoint. Hardware originated drops are
monitored by having devlink call into drop monitor whenever it receives
a dropped packet from the underlying hardware.

This patch set converts drop monitor to monitor both software and
hardware originated drops in the same way - by registering its probe on
the relevant tracepoint.

In addition to drop monitor being more consistent, it is now also
possible to build drop monitor as module instead of as a builtin and
still monitor hardware originated drops. Initially, CONFIG_NET_DEVLINK
implied CONFIG_NET_DROP_MONITOR, but after commit def2fbff
("kconfig: allow symbols implied by y to become m") we can have
CONFIG_NET_DEVLINK=y and CONFIG_NET_DROP_MONITOR=m and hardware
originated drops will not be monitored.

Patch set overview:

Patch #1 adds a tracepoint in devlink for trap reports.

Patch #2 prepares probe functions in drop monitor for the new
tracepoint.

Patch #3 converts drop monitor to use the new tracepoint.

Patches #4-#6 perform cleanups after the conversion.

Patch #7 adds a test case for drop monitor. Both software originated
drops and hardware originated drops (using netdevsim) are tested.

Tested:

| CONFIG_NET_DEVLINK | CONFIG_NET_DROP_MONITOR | Build | SW drops | HW drops |
| -------------------|-------------------------|-------|----------|----------|
|          y         |            y            |   v   |     v    |     v    |
|          y         |            m            |   v   |     v    |     v    |
|          y         |            n            |   v   |     x    |     x    |
|          n         |            y            |   v   |     v    |     x    |
|          n         |            m            |   v   |     v    |     x    |
|          n         |            n            |   v   |     x    |     x    |
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8333c1c4 b7cc6d3c
......@@ -12065,7 +12065,6 @@ M: Neil Horman <nhorman@tuxdriver.com>
L: netdev@vger.kernel.org
S: Maintained
W: https://fedorahosted.org/dropwatch/
F: include/net/drop_monitor.h
F: include/uapi/linux/net_dropmon.h
F: net/core/drop_monitor.c
......
......@@ -624,6 +624,22 @@ struct devlink_health_reporter_ops {
struct netlink_ext_ack *extack);
};
/**
* struct devlink_trap_metadata - Packet trap metadata.
* @trap_name: Trap name.
* @trap_group_name: Trap group name.
* @input_dev: Input netdevice.
* @fa_cookie: Flow action user cookie.
* @trap_type: Trap type.
*/
struct devlink_trap_metadata {
const char *trap_name;
const char *trap_group_name;
struct net_device *input_dev;
const struct flow_action_cookie *fa_cookie;
enum devlink_trap_type trap_type;
};
/**
* struct devlink_trap_policer - Immutable packet trap policer attributes.
* @id: Policer identifier.
......
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _NET_DROP_MONITOR_H_
#define _NET_DROP_MONITOR_H_
#include <linux/ktime.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/flow_offload.h>
/**
* struct net_dm_hw_metadata - Hardware-supplied packet metadata.
* @trap_group_name: Hardware trap group name.
* @trap_name: Hardware trap name.
* @input_dev: Input netdevice.
* @fa_cookie: Flow action user cookie.
*/
struct net_dm_hw_metadata {
const char *trap_group_name;
const char *trap_name;
struct net_device *input_dev;
const struct flow_action_cookie *fa_cookie;
};
#if IS_REACHABLE(CONFIG_NET_DROP_MONITOR)
void net_dm_hw_report(struct sk_buff *skb,
const struct net_dm_hw_metadata *hw_metadata);
#else
static inline void
net_dm_hw_report(struct sk_buff *skb,
const struct net_dm_hw_metadata *hw_metadata)
{
}
#endif
#endif /* _NET_DROP_MONITOR_H_ */
......@@ -171,6 +171,43 @@ TRACE_EVENT(devlink_health_reporter_state_update,
__entry->new_state)
);
/*
* Tracepoint for devlink packet trap:
*/
TRACE_EVENT(devlink_trap_report,
TP_PROTO(const struct devlink *devlink, struct sk_buff *skb,
const struct devlink_trap_metadata *metadata),
TP_ARGS(devlink, skb, metadata),
TP_STRUCT__entry(
__string(bus_name, devlink->dev->bus->name)
__string(dev_name, dev_name(devlink->dev))
__string(driver_name, devlink->dev->driver->name)
__string(trap_name, metadata->trap_name)
__string(trap_group_name, metadata->trap_group_name)
__dynamic_array(char, input_dev_name, IFNAMSIZ)
),
TP_fast_assign(
struct net_device *input_dev = metadata->input_dev;
__assign_str(bus_name, devlink->dev->bus->name);
__assign_str(dev_name, dev_name(devlink->dev));
__assign_str(driver_name, devlink->dev->driver->name);
__assign_str(trap_name, metadata->trap_name);
__assign_str(trap_group_name, metadata->trap_group_name);
__assign_str(input_dev_name,
(input_dev ? input_dev->name : "NULL"));
),
TP_printk("bus_name=%s dev_name=%s driver_name=%s trap_name=%s "
"trap_group_name=%s input_dev_name=%s", __get_str(bus_name),
__get_str(dev_name), __get_str(driver_name),
__get_str(trap_name), __get_str(trap_group_name),
__get_str(input_dev_name))
);
#endif /* _TRACE_DEVLINK_H */
/* This part must be outside protection */
......
......@@ -434,7 +434,6 @@ config NET_SOCK_MSG
config NET_DEVLINK
bool
default n
imply NET_DROP_MONITOR
config PAGE_POOL
bool
......
......@@ -27,7 +27,6 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
#include <net/drop_monitor.h>
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
......@@ -84,6 +83,7 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
......@@ -9261,20 +9261,19 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
}
static void
devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
const struct devlink_trap_item *trap_item,
struct devlink_port *in_devlink_port,
const struct flow_action_cookie *fa_cookie)
devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
const struct devlink_trap_item *trap_item,
struct devlink_port *in_devlink_port,
const struct flow_action_cookie *fa_cookie)
{
struct devlink_trap_group_item *group_item = trap_item->group_item;
hw_metadata->trap_group_name = group_item->group->name;
hw_metadata->trap_name = trap_item->trap->name;
hw_metadata->fa_cookie = fa_cookie;
metadata->trap_name = trap_item->trap->name;
metadata->trap_group_name = trap_item->group_item->group->name;
metadata->fa_cookie = fa_cookie;
metadata->trap_type = trap_item->trap->type;
spin_lock(&in_devlink_port->type_lock);
if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
hw_metadata->input_dev = in_devlink_port->type_dev;
metadata->input_dev = in_devlink_port->type_dev;
spin_unlock(&in_devlink_port->type_lock);
}
......@@ -9292,21 +9291,17 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
{
struct devlink_trap_item *trap_item = trap_ctx;
struct net_dm_hw_metadata hw_metadata = {};
devlink_trap_stats_update(trap_item->stats, skb->len);
devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
/* Control packets were not dropped by the device or encountered an
* exception during forwarding and therefore should not be reported to
* the kernel's drop monitor.
*/
if (trap_item->trap->type == DEVLINK_TRAP_TYPE_CONTROL)
return;
if (trace_devlink_trap_report_enabled()) {
struct devlink_trap_metadata metadata = {};
devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
in_devlink_port, fa_cookie);
net_dm_hw_report(skb, &hw_metadata);
devlink_trap_report_metadata_set(&metadata, trap_item,
in_devlink_port, fa_cookie);
trace_devlink_trap_report(devlink, skb, &metadata);
}
}
EXPORT_SYMBOL_GPL(devlink_trap_report);
......
This diff is collapsed.
......@@ -19,6 +19,7 @@ TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
......
......@@ -30,3 +30,6 @@ CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
CONFIG_TRACEPOINTS=y
CONFIG_NET_DROP_MONITOR=m
CONFIG_NETDEVSIM=m
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# This test is for checking drop monitor functionality.
ret=0
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
# all tests in this script. Can be overridden with -t option
TESTS="
sw_drops
hw_drops
"
IP="ip -netns ns1"
TC="tc -netns ns1"
DEVLINK="devlink -N ns1"
NS_EXEC="ip netns exec ns1"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
log_test()
{
local rc=$1
local expected=$2
local msg="$3"
if [ ${rc} -eq ${expected} ]; then
printf " TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
else
ret=1
nfail=$((nfail+1))
printf " TEST: %-60s [FAIL]\n" "${msg}"
fi
}
setup()
{
modprobe netdevsim &> /dev/null
set -e
ip netns add ns1
$IP link add dummy10 up type dummy
$NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
udevadm settle
local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
$IP link set dev $netdev up
set +e
}
cleanup()
{
$NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
ip netns del ns1
}
sw_drops_test()
{
echo
echo "Software drops test"
setup
local dir=$(mktemp -d)
$TC qdisc add dev dummy10 clsact
$TC filter add dev dummy10 egress pref 1 handle 101 proto ip \
flower dst_ip 192.0.2.10 action drop
$NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \
-A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \
-d 100msec &
timeout 5 dwdump -o sw -w ${dir}/packets.pcap
(( $(tshark -r ${dir}/packets.pcap \
-Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0))
log_test $? 0 "Capturing active software drops"
rm ${dir}/packets.pcap
{ kill %% && wait %%; } 2>/dev/null
timeout 5 dwdump -o sw -w ${dir}/packets.pcap
(( $(tshark -r ${dir}/packets.pcap \
-Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
log_test $? 0 "Capturing inactive software drops"
rm -r $dir
cleanup
}
hw_drops_test()
{
echo
echo "Hardware drops test"
setup
local dir=$(mktemp -d)
$DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap
timeout 5 dwdump -o hw -w ${dir}/packets.pcap
(( $(tshark -r ${dir}/packets.pcap \
-Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
| wc -l) != 0))
log_test $? 0 "Capturing active hardware drops"
rm ${dir}/packets.pcap
$DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop
timeout 5 dwdump -o hw -w ${dir}/packets.pcap
(( $(tshark -r ${dir}/packets.pcap \
-Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
| wc -l) == 0))
log_test $? 0 "Capturing inactive hardware drops"
rm -r $dir
cleanup
}
################################################################################
# usage
usage()
{
cat <<EOF
usage: ${0##*/} OPTS
-t <test> Test(s) to run (default: all)
(options: $TESTS)
EOF
}
################################################################################
# main
while getopts ":t:h" opt; do
case $opt in
t) TESTS=$OPTARG;;
h) usage; exit 0;;
*) usage; exit 1;;
esac
done
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit $ksft_skip;
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
if [ ! -x "$(command -v devlink)" ]; then
echo "SKIP: Could not run test without devlink tool"
exit $ksft_skip
fi
if [ ! -x "$(command -v tshark)" ]; then
echo "SKIP: Could not run test without tshark tool"
exit $ksft_skip
fi
if [ ! -x "$(command -v dwdump)" ]; then
echo "SKIP: Could not run test without dwdump tool"
exit $ksft_skip
fi
if [ ! -x "$(command -v udevadm)" ]; then
echo "SKIP: Could not run test without udevadm tool"
exit $ksft_skip
fi
if [ ! -x "$(command -v timeout)" ]; then
echo "SKIP: Could not run test without timeout tool"
exit $ksft_skip
fi
if [ ! -x "$(command -v mausezahn)" ]; then
echo "SKIP: Could not run test without mausezahn tool"
exit $ksft_skip
fi
tshark -G fields 2> /dev/null | grep -q net_dm
if [ $? -ne 0 ]; then
echo "SKIP: tshark too old, missing net_dm dissector"
exit $ksft_skip
fi
# start clean
cleanup &> /dev/null
for t in $TESTS
do
case $t in
sw_drops|sw) sw_drops_test;;
hw_drops|hw) hw_drops_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
done
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
fi
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment