Commit 7df5e3db authored by Peter Oskolkov's avatar Peter Oskolkov Committed by Alexei Starovoitov

selftests: bpf: tc-bpf flow shaping with EDT

Add a small test that shows how to shape a TCP flow in tc-bpf
with EDT and ECN.
Signed-off-by: default avatarPeter Oskolkov <posk@google.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 315a2029
...@@ -53,7 +53,8 @@ TEST_PROGS := test_kmod.sh \ ...@@ -53,7 +53,8 @@ TEST_PROGS := test_kmod.sh \
test_xdp_vlan.sh \ test_xdp_vlan.sh \
test_lwt_ip_encap.sh \ test_lwt_ip_encap.sh \
test_tcp_check_syncookie.sh \ test_tcp_check_syncookie.sh \
test_tc_tunnel.sh test_tc_tunnel.sh \
test_tc_edt.sh
TEST_PROGS_EXTENDED := with_addr.sh \ TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \ with_tunnels.sh \
......
// SPDX-License-Identifier: GPL-2.0
#include <stdint.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
/* the maximum delay we are willing to add (drop packets beyond that) */
#define TIME_HORIZON_NS (2000 * 1000 * 1000)
#define NS_PER_SEC 1000000000
#define ECN_HORIZON_NS 5000000
#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
struct bpf_map_def SEC("maps") flow_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(uint32_t),
.value_size = sizeof(uint64_t),
.max_entries = 1,
};
static inline int throttle_flow(struct __sk_buff *skb)
{
int key = 0;
uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
THROTTLE_RATE_BPS;
uint64_t now = bpf_ktime_get_ns();
uint64_t tstamp, next_tstamp = 0;
if (last_tstamp)
next_tstamp = *last_tstamp + delay_ns;
tstamp = skb->tstamp;
if (tstamp < now)
tstamp = now;
/* should we throttle? */
if (next_tstamp <= tstamp) {
if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY))
return TC_ACT_SHOT;
return TC_ACT_OK;
}
/* do not queue past the time horizon */
if (next_tstamp - now >= TIME_HORIZON_NS)
return TC_ACT_SHOT;
/* set ecn bit, if needed */
if (next_tstamp - now >= ECN_HORIZON_NS)
bpf_skb_ecn_set_ce(skb);
if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST))
return TC_ACT_SHOT;
skb->tstamp = next_tstamp;
return TC_ACT_OK;
}
static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
{
void *data_end = (void *)(long)skb->data_end;
/* drop malformed packets */
if ((void *)(tcp + 1) > data_end)
return TC_ACT_SHOT;
if (tcp->dest == bpf_htons(9000))
return throttle_flow(skb);
return TC_ACT_OK;
}
static inline int handle_ipv4(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct iphdr *iph;
uint32_t ihl;
/* drop malformed packets */
if (data + sizeof(struct ethhdr) > data_end)
return TC_ACT_SHOT;
iph = (struct iphdr *)(data + sizeof(struct ethhdr));
if ((void *)(iph + 1) > data_end)
return TC_ACT_SHOT;
ihl = iph->ihl * 4;
if (((void *)iph) + ihl > data_end)
return TC_ACT_SHOT;
if (iph->protocol == IPPROTO_TCP)
return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl));
return TC_ACT_OK;
}
SEC("cls_test") int tc_prog(struct __sk_buff *skb)
{
if (skb->protocol == bpf_htons(ETH_P_IP))
return handle_ipv4(skb);
return TC_ACT_OK;
}
char __license[] SEC("license") = "GPL";
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This test installs a TC bpf program that throttles a TCP flow
# with dst port = 9000 down to 5MBps. Then it measures actual
# throughput of the flow.
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
echo "FAIL"
exit 1
fi
# check that nc, dd, and timeout are present
command -v nc >/dev/null 2>&1 || \
{ echo >&2 "nc is not available"; exit 1; }
command -v dd >/dev/null 2>&1 || \
{ echo >&2 "nc is not available"; exit 1; }
command -v timeout >/dev/null 2>&1 || \
{ echo >&2 "timeout is not available"; exit 1; }
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
readonly IP_SRC="172.16.1.100"
readonly IP_DST="172.16.2.100"
cleanup()
{
ip netns del ${NS_SRC}
ip netns del ${NS_DST}
}
trap cleanup EXIT
set -e # exit on error
ip netns add "${NS_SRC}"
ip netns add "${NS_DST}"
ip link add veth_src type veth peer name veth_dst
ip link set veth_src netns ${NS_SRC}
ip link set veth_dst netns ${NS_DST}
ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src
ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst
ip -netns ${NS_SRC} link set dev veth_src up
ip -netns ${NS_DST} link set dev veth_dst up
ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src
ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst
# set up TC on TX
ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
bpf da obj test_tc_edt.o sec cls_test
# start the listener
ip netns exec ${NS_DST} bash -c \
"nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
declare -i NC_PID=$!
sleep 1
declare -ir TIMEOUT=20
declare -ir EXPECTED_BPS=5000000
# run the load, capture RX bytes on DST
declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
cat /sys/class/net/veth_dst/statistics/rx_bytes )
set +e
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
set -e
declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
cat /sys/class/net/veth_dst/statistics/rx_bytes )
declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
$1, ($2-$3)*100.0/$3}'
# Pass the test if the actual bps is within 1% of the expected bps.
# The difference is usually about 0.1% on a 20-sec test, and ==> zero
# the longer the test runs.
declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
awk 'function abs(x){return ((x < 0.0) ? -x : x)}
{if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
else { print "0"} }' )
if [ "${RES}" == "0" ] ; then
echo "PASS"
else
echo "FAIL"
exit 1
fi
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment