Commit 7f3249fb authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-xdp-adjust-tail'

Nikita V. Shirokov says:

====================
In this patch series i'm add new bpf helper which allow to manupulate
xdp's data_end pointer. right now only "shrinking" (reduce packet's size
by moving pointer) is supported (and i see no use case for "growing").
Main use case for such helper is to be able to generate controll (ICMP)
messages from XDP context. such messages usually contains first N bytes
from original packets as a payload, and this is exactly what this helper
would allow us to do (see patch 3 for sample program, where we generate
ICMP "packet too big" message). This helper could be usefull for load
balancing applications where after additional encapsulation, resulting
packet could be bigger then interface MTU.
Aside from new helper this patch series contains minor changes in device
drivers (for ones which requires), so they would recal packet's length
not only when head pointer was adjusted, but if tail's one as well.

v2->v3:
 * adding missed "signed off by" in v2

v1->v2:
 * fixed kbuild warning
 * made offset eq 0 invalid for xdp_bpf_adjust_tail
 * splitted bpf_prog_test_run fix and selftests in sep commits
 * added SPDX licence where applicable
 * some reshuffling in patches order (tests now in the end)
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 0c90f224 c6ffd1ff
...@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, ...@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
if (tx_avail != bp->tx_ring_size) if (tx_avail != bp->tx_ring_size)
*event &= ~BNXT_RX_EVENT; *event &= ~BNXT_RX_EVENT;
*len = xdp.data_end - xdp.data;
if (orig_data != xdp.data) { if (orig_data != xdp.data) {
offset = xdp.data - xdp.data_hard_start; offset = xdp.data - xdp.data_hard_start;
*data_ptr = xdp.data_hard_start + offset; *data_ptr = xdp.data_hard_start + offset;
*len = xdp.data_end - xdp.data;
} }
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
......
...@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, ...@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
action = bpf_prog_run_xdp(prog, &xdp); action = bpf_prog_run_xdp(prog, &xdp);
rcu_read_unlock(); rcu_read_unlock();
len = xdp.data_end - xdp.data;
/* Check if XDP program has changed headers */ /* Check if XDP program has changed headers */
if (orig_data != xdp.data) { if (orig_data != xdp.data) {
len = xdp.data_end - xdp.data;
offset = orig_data - xdp.data; offset = orig_data - xdp.data;
dma_addr -= offset; dma_addr -= offset;
} }
......
...@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
act = bpf_prog_run_xdp(xdp_prog, &xdp); act = bpf_prog_run_xdp(xdp_prog, &xdp);
length = xdp.data_end - xdp.data;
if (xdp.data != orig_data) { if (xdp.data != orig_data) {
length = xdp.data_end - xdp.data;
frags[0].page_offset = xdp.data - frags[0].page_offset = xdp.data -
xdp.data_hard_start; xdp.data_hard_start;
va = xdp.data; va = xdp.data;
......
...@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) ...@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
act = bpf_prog_run_xdp(xdp_prog, &xdp); act = bpf_prog_run_xdp(xdp_prog, &xdp);
pkt_len -= xdp.data - orig_data; pkt_len = xdp.data_end - xdp.data;
pkt_off += xdp.data - orig_data; pkt_off += xdp.data - orig_data;
switch (act) { switch (act) {
......
...@@ -1696,6 +1696,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, ...@@ -1696,6 +1696,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
return NULL; return NULL;
case XDP_PASS: case XDP_PASS:
delta = orig_data - xdp.data; delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
break; break;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
...@@ -1716,7 +1717,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, ...@@ -1716,7 +1717,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
} }
skb_reserve(skb, pad - delta); skb_reserve(skb, pad - delta);
skb_put(skb, len + delta); skb_put(skb, len);
get_page(alloc_frag->page); get_page(alloc_frag->page);
alloc_frag->offset += buflen; alloc_frag->offset += buflen;
......
...@@ -606,6 +606,7 @@ static struct sk_buff *receive_small(struct net_device *dev, ...@@ -606,6 +606,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
case XDP_PASS: case XDP_PASS:
/* Recalculate length in case bpf program changed it */ /* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data; delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
break; break;
case XDP_TX: case XDP_TX:
xdpf = convert_to_xdp_frame(&xdp); xdpf = convert_to_xdp_frame(&xdp);
...@@ -642,7 +643,7 @@ static struct sk_buff *receive_small(struct net_device *dev, ...@@ -642,7 +643,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
goto err; goto err;
} }
skb_reserve(skb, headroom - delta); skb_reserve(skb, headroom - delta);
skb_put(skb, len + delta); skb_put(skb, len);
if (!delta) { if (!delta) {
buf += header_offset; buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
...@@ -757,6 +758,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -757,6 +758,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
offset = xdp.data - offset = xdp.data -
page_address(xdp_page) - vi->hdr_len; page_address(xdp_page) - vi->hdr_len;
/* recalculate len if xdp.data or xdp.data_end were
* adjusted
*/
len = xdp.data_end - xdp.data;
/* We can only create skb based on xdp_page. */ /* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) { if (unlikely(xdp_page != page)) {
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -755,6 +755,13 @@ union bpf_attr { ...@@ -755,6 +755,13 @@ union bpf_attr {
* @addr: pointer to struct sockaddr to bind socket to * @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure * @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code * Return: 0 on success or negative error code
*
* int bpf_xdp_adjust_tail(xdp_md, delta)
* Adjust the xdp_md.data_end by delta. Only shrinking of packet's
* size is supported.
* @xdp_md: pointer to xdp_md
* @delta: A negative integer to be added to xdp_md.data_end
* Return: 0 on success or negative on error
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -821,7 +828,8 @@ union bpf_attr { ...@@ -821,7 +828,8 @@ union bpf_attr {
FN(msg_apply_bytes), \ FN(msg_apply_bytes), \
FN(msg_cork_bytes), \ FN(msg_cork_bytes), \
FN(msg_pull_data), \ FN(msg_pull_data), \
FN(bind), FN(bind), \
FN(xdp_adjust_tail),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, ...@@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.rxq = &rxqueue->xdp_rxq; xdp.rxq = &rxqueue->xdp_rxq;
retval = bpf_test_run(prog, &xdp, repeat, &duration); retval = bpf_test_run(prog, &xdp, repeat, &duration);
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN) if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data; size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
kfree(data); kfree(data);
......
...@@ -3996,9 +3996,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, ...@@ -3996,9 +3996,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog) struct bpf_prog *xdp_prog)
{ {
struct netdev_rx_queue *rxqueue; struct netdev_rx_queue *rxqueue;
void *orig_data, *orig_data_end;
u32 metalen, act = XDP_DROP; u32 metalen, act = XDP_DROP;
struct xdp_buff xdp; struct xdp_buff xdp;
void *orig_data;
int hlen, off; int hlen, off;
u32 mac_len; u32 mac_len;
...@@ -4037,6 +4037,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, ...@@ -4037,6 +4037,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_meta = xdp.data; xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen; xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb); xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data_end = xdp.data_end;
orig_data = xdp.data; orig_data = xdp.data;
rxqueue = netif_get_rxqueue(skb); rxqueue = netif_get_rxqueue(skb);
...@@ -4051,6 +4052,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, ...@@ -4051,6 +4052,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
__skb_push(skb, -off); __skb_push(skb, -off);
skb->mac_header += off; skb->mac_header += off;
/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
* pckt.
*/
off = orig_data_end - xdp.data_end;
if (off != 0)
skb_set_tail_pointer(skb, xdp.data_end - xdp.data);
switch (act) { switch (act) {
case XDP_REDIRECT: case XDP_REDIRECT:
case XDP_TX: case XDP_TX:
......
...@@ -2725,6 +2725,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { ...@@ -2725,6 +2725,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_end = xdp->data_end + offset;
/* only shrinking is allowed for now. */
if (unlikely(offset >= 0))
return -EINVAL;
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
xdp->data_end = data_end;
return 0;
}
static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
.func = bpf_xdp_adjust_tail,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
{ {
void *meta = xdp->data_meta + offset; void *meta = xdp->data_meta + offset;
...@@ -3074,7 +3098,8 @@ bool bpf_helper_changes_pkt_data(void *func) ...@@ -3074,7 +3098,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_l4_csum_replace || func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head || func == bpf_xdp_adjust_head ||
func == bpf_xdp_adjust_meta || func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data) func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail)
return true; return true;
return false; return false;
...@@ -3888,6 +3913,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -3888,6 +3913,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_proto; return &bpf_xdp_redirect_proto;
case BPF_FUNC_redirect_map: case BPF_FUNC_redirect_map:
return &bpf_xdp_redirect_map_proto; return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
......
...@@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor ...@@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor
hostprogs-y += xdp_rxq_info hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp hostprogs-y += syscall_tp
hostprogs-y += cpustat hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
# Libbpf dependencies # Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
...@@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o ...@@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
# Tell kbuild to always build the programs # Tell kbuild to always build the programs
always := $(hostprogs-y) always := $(hostprogs-y)
...@@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o ...@@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o always += syscall_tp_kern.o
always += cpustat_kern.o always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/ HOSTCFLAGS += -I$(srctree)/tools/lib/
...@@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf ...@@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_xdp_rxq_info += -lelf HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf HOSTLOADLIBES_syscall_tp += -lelf
HOSTLOADLIBES_cpustat += -lelf HOSTLOADLIBES_cpustat += -lelf
HOSTLOADLIBES_xdp_adjust_tail += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
......
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program shows how to use bpf_xdp_adjust_tail() by
* generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
* to be more preice in case of v4)" where receiving packets bigger then
* 600 bytes.
*/
#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include "bpf_helpers.h"
#define DEFAULT_TTL 64
#define MAX_PCKT_SIZE 600
#define ICMP_TOOBIG_SIZE 98
#define ICMP_TOOBIG_PAYLOAD_SIZE 92
struct bpf_map_def SEC("maps") icmpcnt = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 1,
};
static __always_inline void count_icmp(void)
{
u64 key = 0;
u64 *icmp_count;
icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
if (icmp_count)
*icmp_count += 1;
}
static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
{
struct ethhdr *eth;
eth = data;
memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
eth->h_proto = orig_eth->h_proto;
}
static __always_inline __u16 csum_fold_helper(__u32 csum)
{
return ~((csum & 0xffff) + (csum >> 16));
}
static __always_inline void ipv4_csum(void *data_start, int data_size,
__u32 *csum)
{
*csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
*csum = csum_fold_helper(*csum);
}
static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
{
int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
if (bpf_xdp_adjust_head(xdp, 0 - headroom))
return XDP_DROP;
void *data = (void *)(long)xdp->data;
void *data_end = (void *)(long)xdp->data_end;
if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
return XDP_DROP;
struct iphdr *iph, *orig_iph;
struct icmphdr *icmp_hdr;
struct ethhdr *orig_eth;
__u32 csum = 0;
__u64 off = 0;
orig_eth = data + headroom;
swap_mac(data, orig_eth);
off += sizeof(struct ethhdr);
iph = data + off;
off += sizeof(struct iphdr);
icmp_hdr = data + off;
off += sizeof(struct icmphdr);
orig_iph = data + off;
icmp_hdr->type = ICMP_DEST_UNREACH;
icmp_hdr->code = ICMP_FRAG_NEEDED;
icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
icmp_hdr->checksum = 0;
ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
icmp_hdr->checksum = csum;
iph->ttl = DEFAULT_TTL;
iph->daddr = orig_iph->saddr;
iph->saddr = orig_iph->daddr;
iph->version = 4;
iph->ihl = 5;
iph->protocol = IPPROTO_ICMP;
iph->tos = 0;
iph->tot_len = htons(
ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
iph->check = 0;
csum = 0;
ipv4_csum(iph, sizeof(struct iphdr), &csum);
iph->check = csum;
count_icmp();
return XDP_TX;
}
static __always_inline int handle_ipv4(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int pckt_size = data_end - data;
int offset;
if (pckt_size > MAX_PCKT_SIZE) {
offset = pckt_size - ICMP_TOOBIG_SIZE;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_PASS;
return send_icmp4_too_big(xdp);
}
return XDP_PASS;
}
SEC("xdp_icmp")
int _xdp_icmp(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct ethhdr *eth = data;
__u16 h_proto;
if (eth + 1 > data_end)
return XDP_DROP;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP))
return handle_ipv4(xdp);
else
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
#include "bpf_load.h"
#include "libbpf.h"
#include "bpf_util.h"
#define STATS_INTERVAL_S 2U
static int ifindex = -1;
static __u32 xdp_flags;
static void int_exit(int sig)
{
if (ifindex > -1)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(0);
}
/* simple "icmp packet too big sent" counter
*/
static void poll_stats(unsigned int kill_after_s)
{
time_t started_at = time(NULL);
__u64 value = 0;
int key = 0;
while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
sleep(STATS_INTERVAL_S);
assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
}
}
static void usage(const char *cmd)
{
printf("Start a XDP prog which send ICMP \"packet too big\" \n"
"messages if ingress packet is bigger then MAX_SIZE bytes\n");
printf("Usage: %s [...]\n", cmd);
printf(" -i <ifindex> Interface Index\n");
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
printf(" -h Display this help\n");
}
int main(int argc, char **argv)
{
unsigned char opt_flags[256] = {};
unsigned int kill_after_s = 0;
const char *optstr = "i:T:SNh";
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
int opt;
int i;
for (i = 0; i < strlen(optstr); i++)
if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
opt_flags[(unsigned char)optstr[i]] = 1;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
case 'i':
ifindex = atoi(optarg);
break;
case 'T':
kill_after_s = atoi(optarg);
break;
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
default:
usage(argv[0]);
return 1;
}
opt_flags[opt] = 0;
}
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
usage(argv[0]);
return 1;
}
}
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
return 1;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(kill_after_s);
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
return 0;
}
...@@ -755,6 +755,13 @@ union bpf_attr { ...@@ -755,6 +755,13 @@ union bpf_attr {
* @addr: pointer to struct sockaddr to bind socket to * @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure * @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code * Return: 0 on success or negative error code
*
* int bpf_xdp_adjust_tail(xdp_md, delta)
* Adjust the xdp_md.data_end by delta. Only shrinking of packet's
* size is supported.
* @xdp_md: pointer to xdp_md
* @delta: A negative integer to be added to xdp_md.data_end
* Return: 0 on success or negative on error
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -821,7 +828,8 @@ union bpf_attr { ...@@ -821,7 +828,8 @@ union bpf_attr {
FN(msg_apply_bytes), \ FN(msg_apply_bytes), \
FN(msg_cork_bytes), \ FN(msg_cork_bytes), \
FN(msg_pull_data), \ FN(msg_pull_data), \
FN(bind), FN(bind), \
FN(xdp_adjust_tail),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -31,7 +31,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test ...@@ -31,7 +31,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o
# Order correspond to 'make run_tests' order # Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \ TEST_PROGS := test_kmod.sh \
......
...@@ -96,6 +96,9 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = ...@@ -96,6 +96,9 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data; (void *) BPF_FUNC_msg_pull_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind; (void *) BPF_FUNC_bind;
static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_tail;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
...@@ -129,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag ...@@ -129,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l3_csum_replace; (void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace; (void *) BPF_FUNC_l4_csum_replace;
static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
(void *) BPF_FUNC_csum_diff;
static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
(void *) BPF_FUNC_skb_under_cgroup; (void *) BPF_FUNC_skb_under_cgroup;
static int (*bpf_skb_change_head)(void *, int len, int flags) = static int (*bpf_skb_change_head)(void *, int len, int flags) =
......
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include "bpf_helpers.h"
int _version SEC("version") = 1;
SEC("xdp_adjust_tail")
int _xdp_adjust_tail(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int offset = 0;
if (data_end - data == 54)
offset = 256;
else
offset = 20;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_DROP;
return XDP_TX;
}
char _license[] SEC("license") = "GPL";
...@@ -166,6 +166,37 @@ static void test_xdp(void) ...@@ -166,6 +166,37 @@ static void test_xdp(void)
bpf_object__close(obj); bpf_object__close(obj);
} }
static void test_xdp_adjust_tail(void)
{
const char *file = "./test_adjust_tail.o";
struct bpf_object *obj;
char buf[128];
__u32 duration, retval, size;
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (err) {
error_cnt++;
return;
}
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != XDP_DROP,
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != XDP_TX || size != 54,
"ipv6", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
bpf_object__close(obj);
}
#define MAGIC_VAL 0x1234 #define MAGIC_VAL 0x1234
#define NUM_ITER 100000 #define NUM_ITER 100000
#define VIP_NUM 5 #define VIP_NUM 5
...@@ -1177,6 +1208,7 @@ int main(void) ...@@ -1177,6 +1208,7 @@ int main(void)
{ {
test_pkt_access(); test_pkt_access();
test_xdp(); test_xdp();
test_xdp_adjust_tail();
test_l4lb_all(); test_l4lb_all();
test_xdp_noinline(); test_xdp_noinline();
test_tcp_estats(); test_tcp_estats();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment