Commit 8c2e6c90 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-01-11

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Various BPF related improvements and fixes to nfp driver: i) do
   not register XDP RXQ structure to control queues, ii) round up
   program stack size to word size for nfp, iii) restrict MTU changes
   when BPF offload is active, iv) add more fully featured relocation
   support to JIT, v) add support for signed compare instructions to
   the nfp JIT, vi) export and reuse verfier log routine for nfp, and
   many more, from Jakub, Quentin and Nic.

2) Fix a syzkaller reported GPF in BPF's copy_verifier_state() when
   we hit kmalloc failure path, from Alexei.

3) Add two follow-up fixes for the recent XDP RXQ series: i) kvzalloc()
   allocated memory was only kfree()'ed, and ii) fix a memory leak where
   RX queue was not freed in netif_free_rx_queues(), from Jakub.

4) Add a sample for transferring XDP meta data into the skb, here it
   is used for setting skb->mark with the buffer from XDP, from Jesper.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3d93e337 36e04a2d
......@@ -87,16 +87,21 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
static int
nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
{
struct nfp_bpf_vnic *bv;
int err;
nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
if (!nn->app_priv)
bv = kzalloc(sizeof(*bv), GFP_KERNEL);
if (!bv)
return -ENOMEM;
nn->app_priv = bv;
err = nfp_app_nic_vnic_alloc(app, nn, id);
if (err)
goto err_free_priv;
bv->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
bv->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
return 0;
err_free_priv:
kfree(nn->app_priv);
......@@ -191,7 +196,27 @@ static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
{
return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF;
struct nfp_bpf_vnic *bv = nn->app_priv;
return !!bv->tc_prog;
}
static int
nfp_bpf_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
{
struct nfp_net *nn = netdev_priv(netdev);
unsigned int max_mtu;
if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
return 0;
max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
if (new_mtu > max_mtu) {
nn_info(nn, "BPF offload active, MTU over %u not supported\n",
max_mtu);
return -EBUSY;
}
return 0;
}
static int
......@@ -311,6 +336,8 @@ const struct nfp_app_type app_bpf = {
.init = nfp_bpf_init,
.clean = nfp_bpf_clean,
.change_mtu = nfp_bpf_change_mtu,
.extra_cap = nfp_bpf_extra_cap,
.vnic_alloc = nfp_bpf_vnic_alloc,
......@@ -318,9 +345,6 @@ const struct nfp_app_type app_bpf = {
.setup_tc = nfp_bpf_setup_tc,
.tc_busy = nfp_bpf_tc_busy,
.bpf = nfp_ndo_bpf,
.xdp_offload = nfp_bpf_xdp_offload,
.bpf_verifier_prep = nfp_bpf_verifier_prep,
.bpf_translate = nfp_bpf_translate,
.bpf_destroy = nfp_bpf_destroy,
};
......@@ -42,17 +42,28 @@
#include "../nfp_asm.h"
/* For branch fixup logic use up-most byte of branch instruction as scratch
/* For relocation logic use up-most byte of branch instruction as scratch
* area. Remember to clear this before sending instructions to HW!
*/
#define OP_BR_SPECIAL 0xff00000000000000ULL
enum br_special {
OP_BR_NORMAL = 0,
OP_BR_GO_OUT,
OP_BR_GO_ABORT,
#define OP_RELO_TYPE 0xff00000000000000ULL
enum nfp_relo_type {
RELO_NONE = 0,
/* standard internal jumps */
RELO_BR_REL,
/* internal jumps to parts of the outro */
RELO_BR_GO_OUT,
RELO_BR_GO_ABORT,
/* external jumps to fixed addresses */
RELO_BR_NEXT_PKT,
};
/* To make absolute relocated branches (branches other than RELO_BR_REL)
* distinguishable in user space dumps from normal jumps, add a large offset
* to them.
*/
#define BR_OFF_RELO 15000
enum static_regs {
STATIC_REG_IMM = 21, /* Bank AB */
STATIC_REG_STACK = 22, /* Bank A */
......@@ -191,11 +202,9 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
* @__prog_alloc_len: alloc size of @prog array
* @verifier_meta: temporary storage for verifier's insn meta
* @type: BPF program type
* @start_off: address of the first instruction in the memory
* @last_bpf_off: address of the last instruction translated from BPF
* @tgt_out: jump target for normal exit
* @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
* @tgt_done: jump target to get the next packet
* @n_translated: number of successfully translated instructions (for errors)
* @error: error code if something went wrong
* @stack_depth: max stack depth from the verifier
......@@ -213,11 +222,9 @@ struct nfp_prog {
enum bpf_prog_type type;
unsigned int start_off;
unsigned int last_bpf_off;
unsigned int tgt_out;
unsigned int tgt_abort;
unsigned int tgt_done;
unsigned int n_translated;
int error;
......@@ -231,11 +238,16 @@ struct nfp_prog {
/**
* struct nfp_bpf_vnic - per-vNIC BPF priv structure
* @tc_prog: currently loaded cls_bpf program
* @start_off: address of the first instruction in the memory
* @tgt_done: jump target to get the next packet
*/
struct nfp_bpf_vnic {
struct bpf_prog *tc_prog;
unsigned int start_off;
unsigned int tgt_done;
};
void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt);
int nfp_bpf_jit(struct nfp_prog *prog);
extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
......@@ -244,16 +256,14 @@ struct netdev_bpf;
struct nfp_app;
struct nfp_net;
int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf);
int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
bool old_prog);
int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf);
int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns);
void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
#endif
......@@ -42,6 +42,7 @@
#include <linux/jiffies.h>
#include <linux/timer.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
......@@ -70,23 +71,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
list_add_tail(&meta->l, &nfp_prog->insns);
}
/* Another pass to record jump information. */
list_for_each_entry(meta, &nfp_prog->insns, l) {
u64 code = meta->insn.code;
if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
BPF_OP(code) != BPF_CALL) {
struct nfp_insn_meta *dst_meta;
unsigned short dst_indx;
dst_indx = meta->n + 1 + meta->insn.off;
dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
cnt);
meta->jmp_dst = dst_meta;
dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
}
}
nfp_bpf_jit_prepare(nfp_prog, cnt);
return 0;
}
......@@ -102,8 +87,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
kfree(nfp_prog);
}
int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf)
static int
nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf)
{
struct bpf_prog *prog = bpf->verifier.prog;
struct nfp_prog *nfp_prog;
......@@ -133,8 +119,7 @@ int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
return ret;
}
int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog)
static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int stack_size;
......@@ -146,37 +131,48 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
prog->aux->stack_depth, stack_size);
return -EOPNOTSUPP;
}
nfp_prog->stack_depth = prog->aux->stack_depth;
nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4);
max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
nfp_prog->__prog_alloc_len = max_instr * sizeof(u64);
nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL);
nfp_prog->prog = kvmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL);
if (!nfp_prog->prog)
return -ENOMEM;
return nfp_bpf_jit(nfp_prog);
}
int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog)
static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
kfree(nfp_prog->prog);
kvfree(nfp_prog->prog);
nfp_prog_free(nfp_prog);
return 0;
}
int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
{
switch (bpf->command) {
case BPF_OFFLOAD_VERIFIER_PREP:
return nfp_bpf_verifier_prep(app, nn, bpf);
case BPF_OFFLOAD_TRANSLATE:
return nfp_bpf_translate(nn, bpf->offload.prog);
case BPF_OFFLOAD_DESTROY:
return nfp_bpf_destroy(nn, bpf->offload.prog);
default:
return -EINVAL;
}
}
static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int max_mtu;
dma_addr_t dma_addr;
void *img;
int err;
max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
......@@ -185,11 +181,17 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
return -EOPNOTSUPP;
}
dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog,
img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv);
if (IS_ERR(img))
return PTR_ERR(img);
dma_addr = dma_map_single(nn->dp.dev, img,
nfp_prog->prog_len * sizeof(u64),
DMA_TO_DEVICE);
if (dma_mapping_error(nn->dp.dev, dma_addr))
if (dma_mapping_error(nn->dp.dev, dma_addr)) {
kfree(img);
return -ENOMEM;
}
nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len);
nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr);
......@@ -201,6 +203,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64),
DMA_TO_DEVICE);
kfree(img);
return err;
}
......
......@@ -31,8 +31,6 @@
* SOFTWARE.
*/
#define pr_fmt(fmt) "NFP net bpf: " fmt
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/kernel.h>
......@@ -41,6 +39,9 @@
#include "fw.h"
#include "main.h"
#define pr_vlog(env, fmt, ...) \
bpf_verifier_log_write(env, "[nfp] " fmt, ##__VA_ARGS__)
struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns)
......@@ -116,18 +117,18 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
switch (func_id) {
case BPF_FUNC_xdp_adjust_head:
if (!bpf->adjust_head.off_max) {
pr_warn("adjust_head not supported by FW\n");
pr_vlog(env, "adjust_head not supported by FW\n");
return -EOPNOTSUPP;
}
if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) {
pr_warn("adjust_head: FW requires shifting metadata, not supported by the driver\n");
pr_vlog(env, "adjust_head: FW requires shifting metadata, not supported by the driver\n");
return -EOPNOTSUPP;
}
nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
break;
default:
pr_warn("unsupported function id: %d\n", func_id);
pr_vlog(env, "unsupported function id: %d\n", func_id);
return -EOPNOTSUPP;
}
......@@ -150,7 +151,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off);
pr_info("unsupported exit state: %d, var_off: %s\n",
pr_vlog(env, "unsupported exit state: %d, var_off: %s\n",
reg0->type, tn_buf);
return -EINVAL;
}
......@@ -160,7 +161,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
imm <= TC_ACT_REDIRECT &&
imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN &&
imm != TC_ACT_QUEUED) {
pr_info("unsupported exit state: %d, imm: %llx\n",
pr_vlog(env, "unsupported exit state: %d, imm: %llx\n",
reg0->type, imm);
return -EINVAL;
}
......@@ -171,12 +172,13 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
static int
nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
struct nfp_insn_meta *meta,
const struct bpf_reg_state *reg)
const struct bpf_reg_state *reg,
struct bpf_verifier_env *env)
{
s32 old_off, new_off;
if (!tnum_is_const(reg->var_off)) {
pr_info("variable ptr stack access\n");
pr_vlog(env, "variable ptr stack access\n");
return -EINVAL;
}
......@@ -194,7 +196,7 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
if (old_off % 4 == new_off % 4)
return 0;
pr_info("stack access changed location was:%d is:%d\n",
pr_vlog(env, "stack access changed location was:%d is:%d\n",
old_off, new_off);
return -EINVAL;
}
......@@ -209,18 +211,18 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
if (reg->type != PTR_TO_CTX &&
reg->type != PTR_TO_STACK &&
reg->type != PTR_TO_PACKET) {
pr_info("unsupported ptr type: %d\n", reg->type);
pr_vlog(env, "unsupported ptr type: %d\n", reg->type);
return -EINVAL;
}
if (reg->type == PTR_TO_STACK) {
err = nfp_bpf_check_stack_access(nfp_prog, meta, reg);
err = nfp_bpf_check_stack_access(nfp_prog, meta, reg, env);
if (err)
return err;
}
if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
pr_info("ptr type changed for instruction %d -> %d\n",
pr_vlog(env, "ptr type changed for instruction %d -> %d\n",
meta->ptr.type, reg->type);
return -EINVAL;
}
......@@ -241,7 +243,7 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (meta->insn.src_reg >= MAX_BPF_REG ||
meta->insn.dst_reg >= MAX_BPF_REG) {
pr_err("program uses extended registers - jit hardening?\n");
pr_vlog(env, "program uses extended registers - jit hardening?\n");
return -EINVAL;
}
......
......@@ -82,15 +82,15 @@ extern const struct nfp_app_type app_flower;
* @repr_clean: representor about to be unregistered
* @repr_open: representor netdev open callback
* @repr_stop: representor netdev stop callback
* @change_mtu: MTU change on a netdev has been requested (veto-only, change
* is not guaranteed to be committed)
* @start: start application logic
* @stop: stop application logic
* @ctrl_msg_rx: control message handler
* @setup_tc: setup TC ndo
* @tc_busy: TC HW offload busy (rules loaded)
* @bpf: BPF ndo offload-related calls
* @xdp_offload: offload an XDP program
* @bpf_verifier_prep: verifier prep for dev-specific BPF programs
* @bpf_translate: translate call for dev-specific BPF programs
* @bpf_destroy: destroy for dev-specific BPF programs
* @eswitch_mode_get: get SR-IOV eswitch mode
* @sriov_enable: app-specific sriov initialisation
* @sriov_disable: app-specific sriov clean-up
......@@ -120,6 +120,9 @@ struct nfp_app_type {
int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr);
int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr);
int (*change_mtu)(struct nfp_app *app, struct net_device *netdev,
int new_mtu);
int (*start)(struct nfp_app *app);
void (*stop)(struct nfp_app *app);
......@@ -128,14 +131,10 @@ struct nfp_app_type {
int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
enum tc_setup_type type, void *type_data);
bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn);
int (*bpf)(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *xdp);
int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf);
int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
int (*sriov_enable)(struct nfp_app *app, int num_vfs);
void (*sriov_disable)(struct nfp_app *app);
......@@ -242,6 +241,14 @@ nfp_app_repr_clean(struct nfp_app *app, struct net_device *netdev)
app->type->repr_clean(app, netdev);
}
static inline int
nfp_app_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
{
if (!app || !app->type->change_mtu)
return 0;
return app->type->change_mtu(app, netdev, new_mtu);
}
static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
{
app->ctrl = ctrl;
......@@ -303,6 +310,14 @@ static inline int nfp_app_setup_tc(struct nfp_app *app,
return app->type->setup_tc(app, netdev, type, type_data);
}
static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf)
{
if (!app || !app->type->bpf)
return -EINVAL;
return app->type->bpf(app, nn, bpf);
}
static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog)
{
......@@ -311,33 +326,6 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
return app->type->xdp_offload(app, nn, prog);
}
static inline int
nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf)
{
if (!app || !app->type->bpf_verifier_prep)
return -EOPNOTSUPP;
return app->type->bpf_verifier_prep(app, nn, bpf);
}
static inline int
nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog)
{
if (!app || !app->type->bpf_translate)
return -EOPNOTSUPP;
return app->type->bpf_translate(app, nn, prog);
}
static inline int
nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog)
{
if (!app || !app->type->bpf_destroy)
return -EOPNOTSUPP;
return app->type->bpf_destroy(app, nn, prog);
}
static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
{
trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
......
......@@ -50,6 +50,36 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
[CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
};
u16 br_get_offset(u64 instr)
{
u16 addr_lo, addr_hi;
addr_lo = FIELD_GET(OP_BR_ADDR_LO, instr);
addr_hi = FIELD_GET(OP_BR_ADDR_HI, instr);
return (addr_hi * ((OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)) + 1)) |
addr_lo;
}
void br_set_offset(u64 *instr, u16 offset)
{
u16 addr_lo, addr_hi;
addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
addr_hi = offset != addr_lo;
*instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
*instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
*instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
}
void br_add_offset(u64 *instr, u16 offset)
{
u16 addr;
addr = br_get_offset(*instr);
br_set_offset(instr, addr + offset);
}
static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst)
{
bool lm_id, lm_dec = false;
......
......@@ -81,6 +81,7 @@ enum br_mask {
BR_BHS = 0x04,
BR_BLO = 0x05,
BR_BGE = 0x08,
BR_BLT = 0x09,
BR_UNC = 0x18,
};
......@@ -93,6 +94,10 @@ enum br_ctx_signal_state {
BR_CSS_NONE = 2,
};
u16 br_get_offset(u64 instr);
void br_set_offset(u64 *instr, u16 offset);
void br_add_offset(u64 *instr, u16 offset);
#define OP_BBYTE_BASE 0x0c800000000ULL
#define OP_BB_A_SRC 0x000000000ffULL
#define OP_BB_BYTE 0x00000000300ULL
......
......@@ -2253,7 +2253,8 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
if (dp->netdev)
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
kfree(rx_ring->rxbufs);
if (rx_ring->rxds)
......@@ -2279,9 +2280,12 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
{
int sz, err;
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx);
if (err < 0)
return err;
if (dp->netdev) {
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
rx_ring->idx);
if (err < 0)
return err;
}
rx_ring->cnt = dp->rxd_cnt;
rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
......@@ -3045,6 +3049,11 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nfp_net *nn = netdev_priv(netdev);
struct nfp_net_dp *dp;
int err;
err = nfp_app_change_mtu(nn->app, netdev, new_mtu);
if (err)
return err;
dp = nfp_net_clone_dp(nn);
if (!dp)
......@@ -3405,16 +3414,8 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
return 0;
case BPF_OFFLOAD_VERIFIER_PREP:
return nfp_app_bpf_verifier_prep(nn->app, nn, xdp);
case BPF_OFFLOAD_TRANSLATE:
return nfp_app_bpf_translate(nn->app, nn,
xdp->offload.prog);
case BPF_OFFLOAD_DESTROY:
return nfp_app_bpf_destroy(nn->app, nn,
xdp->offload.prog);
default:
return -EINVAL;
return nfp_app_bpf(nn->app, nn, xdp);
}
}
......
......@@ -91,23 +91,24 @@
#define NFP_NET_RSS_IPV6_EX_UDP 9
/**
* @NFP_NET_TXR_MAX: Maximum number of TX rings
* @NFP_NET_RXR_MAX: Maximum number of RX rings
* Ring counts
* %NFP_NET_TXR_MAX: Maximum number of TX rings
* %NFP_NET_RXR_MAX: Maximum number of RX rings
*/
#define NFP_NET_TXR_MAX 64
#define NFP_NET_RXR_MAX 64
/**
* Read/Write config words (0x0000 - 0x002c)
* @NFP_NET_CFG_CTRL: Global control
* @NFP_NET_CFG_UPDATE: Indicate which fields are updated
* @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
* @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
* @NFP_NET_CFG_MTU: Set MTU size
* @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU)
* @NFP_NET_CFG_EXN: MSI-X table entry for exceptions
* @NFP_NET_CFG_LSC: MSI-X table entry for link state changes
* @NFP_NET_CFG_MACADDR: MAC address
* %NFP_NET_CFG_CTRL: Global control
* %NFP_NET_CFG_UPDATE: Indicate which fields are updated
* %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
* %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
* %NFP_NET_CFG_MTU: Set MTU size
* %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU)
* %NFP_NET_CFG_EXN: MSI-X table entry for exceptions
* %NFP_NET_CFG_LSC: MSI-X table entry for link state changes
* %NFP_NET_CFG_MACADDR: MAC address
*
* TODO:
* - define Error details in UPDATE
......@@ -176,14 +177,14 @@
/**
* Read-only words (0x0030 - 0x0050):
* @NFP_NET_CFG_VERSION: Firmware version number
* @NFP_NET_CFG_STS: Status
* @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL)
* @NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
* @NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
* @NFP_NET_CFG_MAX_MTU: Maximum support MTU
* @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only)
* @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only)
* %NFP_NET_CFG_VERSION: Firmware version number
* %NFP_NET_CFG_STS: Status
* %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL)
* %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
* %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
* %NFP_NET_CFG_MAX_MTU: Maximum support MTU
* %NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only)
* %NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only)
*
* TODO:
* - define more STS bits
......@@ -228,31 +229,31 @@
/**
* RSS capabilities
* @NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
* @NFP_NET_CFG_RSS_HFUNC)
* %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
* %NFP_NET_CFG_RSS_HFUNC)
*/
#define NFP_NET_CFG_RSS_CAP 0x0054
#define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000
/**
* VXLAN/UDP encap configuration
* @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports
* @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes
* %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports
* %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes
*/
#define NFP_NET_CFG_VXLAN_PORT 0x0060
#define NFP_NET_CFG_VXLAN_SZ 0x0008
/**
* BPF section
* @NFP_NET_CFG_BPF_ABI: BPF ABI version
* @NFP_NET_CFG_BPF_CAP: BPF capabilities
* @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
* @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded
* @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit
* @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks
* @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks
* @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions
* @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
* %NFP_NET_CFG_BPF_ABI: BPF ABI version
* %NFP_NET_CFG_BPF_CAP: BPF capabilities
* %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
* %NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded
* %NFP_NET_CFG_BPF_DONE: Offset to jump to on exit
* %NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks
* %NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks
* %NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions
* %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
*/
#define NFP_NET_CFG_BPF_ABI 0x0080
#define NFP_NET_BPF_ABI 2
......@@ -278,9 +279,9 @@
/**
* RSS configuration (0x0100 - 0x01ac):
* Used only when NFP_NET_CFG_CTRL_RSS is enabled
* @NFP_NET_CFG_RSS_CFG: RSS configuration word
* @NFP_NET_CFG_RSS_KEY: RSS "secret" key
* @NFP_NET_CFG_RSS_ITBL: RSS indirection table
* %NFP_NET_CFG_RSS_CFG: RSS configuration word
* %NFP_NET_CFG_RSS_KEY: RSS "secret" key
* %NFP_NET_CFG_RSS_ITBL: RSS indirection table
*/
#define NFP_NET_CFG_RSS_BASE 0x0100
#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE
......@@ -305,13 +306,13 @@
/**
* TX ring configuration (0x200 - 0x800)
* @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration
* @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries)
* @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
* @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries)
* @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries)
* @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries)
* @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
* %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration
* %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries)
* %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
* %NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries)
* %NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries)
* %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries)
* %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
*/
#define NFP_NET_CFG_TXR_BASE 0x0200
#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
......@@ -325,12 +326,12 @@
/**
* RX ring configuration (0x0800 - 0x0c00)
* @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration
* @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries)
* @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries)
* @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries)
* @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries)
* @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
* %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration
* %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries)
* %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries)
* %NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries)
* %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries)
* %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
*/
#define NFP_NET_CFG_RXR_BASE 0x0800
#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
......@@ -343,7 +344,7 @@
/**
* Interrupt Control/Cause registers (0x0c00 - 0x0d00)
* These registers are only used when MSI-X auto-masking is not
* enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index
* enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index
* by MSI-X entry and are 1B in size. If an entry is zero, the
* corresponding entry is enabled. If the FW generates an interrupt,
* it writes a cause into the corresponding field. This also masks
......@@ -393,8 +394,8 @@
/**
* Per ring stats (0x1000 - 0x1800)
* options, 64bit per entry
* @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count)
* @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count)
* %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count)
* %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count)
*/
#define NFP_NET_CFG_TXR_STATS_BASE 0x1000
#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \
......@@ -418,10 +419,10 @@
/**
* VLAN filtering using general use mailbox
* @NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox
* @NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter
* @NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter
* @NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes
* %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox
* %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter
* %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter
* %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes
*/
#define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_VAL
#define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER
......
......@@ -186,6 +186,13 @@ nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
return -EINVAL;
}
static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nfp_repr *repr = netdev_priv(netdev);
return nfp_app_change_mtu(repr->app, netdev, new_mtu);
}
static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_repr *repr = netdev_priv(netdev);
......@@ -240,6 +247,7 @@ const struct net_device_ops nfp_repr_netdev_ops = {
.ndo_open = nfp_repr_open,
.ndo_stop = nfp_repr_stop,
.ndo_start_xmit = nfp_repr_xmit,
.ndo_change_mtu = nfp_repr_change_mtu,
.ndo_get_stats64 = nfp_repr_get_stats64,
.ndo_has_offload_stats = nfp_repr_has_offload_stats,
.ndo_get_offload_stats = nfp_repr_get_offload_stats,
......
......@@ -89,6 +89,7 @@ struct nfp_repr {
* @NFP_REPR_TYPE_PHYS_PORT: external NIC port
* @NFP_REPR_TYPE_PF: physical function
* @NFP_REPR_TYPE_VF: virtual function
* @__NFP_REPR_TYPE_MAX: number of representor types
*/
enum nfp_repr_type {
NFP_REPR_TYPE_PHYS_PORT,
......
......@@ -192,6 +192,9 @@ struct bpf_verifier_env {
u32 subprog_cnt;
};
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
......
......@@ -169,11 +169,11 @@ struct bpf_call_arg_meta {
static DEFINE_MUTEX(bpf_verifier_lock);
/* log_level controls verbosity level of eBPF verifier.
* verbose() is used to dump the verification trace to the log, so the user
* can figure out what's wrong with the program
* bpf_verifier_log_write() is used to dump the verification trace to the log,
* so the user can figure out what's wrong with the program
*/
static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
const char *fmt, ...)
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...)
{
struct bpf_verifer_log *log = &env->log;
unsigned int n;
......@@ -197,6 +197,14 @@ static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
else
log->ubuf = NULL;
}
EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
/* Historically bpf_verifier_log_write was called verbose, but the name was too
* generic for symbol export. The function was renamed, but not the calls in
* the verifier to avoid complicating backports. Hence the alias below.
*/
static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
const char *fmt, ...)
__attribute__((alias("bpf_verifier_log_write")));
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
......@@ -375,6 +383,8 @@ static int realloc_func_state(struct bpf_func_state *state, int size,
static void free_func_state(struct bpf_func_state *state)
{
if (!state)
return;
kfree(state->stack);
kfree(state);
}
......@@ -487,6 +497,8 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
}
return &elem->st;
err:
free_verifier_state(env->cur_state, true);
env->cur_state = NULL;
/* pop all elements and return */
while (!pop_stack(env, NULL, NULL));
return NULL;
......
......@@ -7657,7 +7657,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
/* Rollback successful reg's and free other resources */
while (i--)
xdp_rxq_info_unreg(&rx[i].xdp_rxq);
kfree(dev->_rx);
kvfree(dev->_rx);
dev->_rx = NULL;
return err;
}
......@@ -7665,16 +7665,15 @@ static int netif_alloc_rx_queues(struct net_device *dev)
static void netif_free_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
/* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
if (!dev->_rx)
return;
rx = dev->_rx;
for (i = 0; i < count; i++)
xdp_rxq_info_unreg(&rx[i].xdp_rxq);
xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
kvfree(dev->_rx);
}
static void netdev_init_one_queue(struct net_device *dev,
......
......@@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
......
#!/bin/bash
#
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
#
# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
# eBPF programs, both for XDP and clsbpf. Shell script function
# wrappers and even long options parsing is illustrated, for ease of
# use.
#
# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
# that need to collaborate between XDP and TC hooks. Thus, it is
# convenient that the same tool load both programs that need to work
# together.
#
BPF_FILE=xdp2skb_meta_kern.o
DIR=$(dirname $0)
export TC=/usr/sbin/tc
export IP=/usr/sbin/ip
function usage() {
echo ""
echo "Usage: $0 [-vfh] --dev ethX"
echo " -d | --dev : Network device (required)"
echo " --flush : Cleanup flush TC and XDP progs"
echo " --list : (\$LIST) List TC and XDP progs"
echo " -v | --verbose : (\$VERBOSE) Verbose"
echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)"
echo ""
}
## -- General shell logging cmds --
function err() {
local exitcode=$1
shift
echo "ERROR: $@" >&2
exit $exitcode
}
function info() {
if [[ -n "$VERBOSE" ]]; then
echo "# $@"
fi
}
## -- Helper function calls --
# Wrapper call for TC and IP
# - Will display the offending command on failure
function _call_cmd() {
local cmd="$1"
local allow_fail="$2"
shift 2
if [[ -n "$VERBOSE" ]]; then
echo "$(basename $cmd) $@"
fi
if [[ -n "$DRYRUN" ]]; then
return
fi
$cmd "$@"
local status=$?
if (( $status != 0 )); then
if [[ "$allow_fail" == "" ]]; then
err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
fi
fi
}
function call_tc() {
_call_cmd "$TC" "" "$@"
}
function call_tc_allow_fail() {
_call_cmd "$TC" "allow_fail" "$@"
}
function call_ip() {
_call_cmd "$IP" "" "$@"
}
## --- Parse command line arguments / parameters ---
# Using external program "getopt" to get --long-options
OPTIONS=$(getopt -o vfhd: \
--long verbose,flush,help,list,dev:,dry-run -- "$@")
if (( $? != 0 )); then
err 4 "Error calling getopt"
fi
eval set -- "$OPTIONS"
unset DEV
unset FLUSH
while true; do
case "$1" in
-d | --dev ) # device
DEV=$2
info "Device set to: DEV=$DEV" >&2
shift 2
;;
-v | --verbose)
VERBOSE=yes
# info "Verbose mode: VERBOSE=$VERBOSE" >&2
shift
;;
--dry-run )
DRYRUN=yes
VERBOSE=yes
info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
shift
;;
-f | --flush )
FLUSH=yes
shift
;;
--list )
LIST=yes
shift
;;
-- )
shift
break
;;
-h | --help )
usage;
exit 0
;;
* )
shift
break
;;
esac
done
FILE="$DIR/$BPF_FILE"
if [[ ! -e $FILE ]]; then
err 3 "Missing BPF object file ($FILE)"
fi
if [[ -z $DEV ]]; then
usage
err 2 "Please specify network device -- required option --dev"
fi
## -- Function calls --
function list_tc()
{
local device="$1"
shift
info "Listing current TC ingress rules"
call_tc filter show dev $device ingress
}
function list_xdp()
{
local device="$1"
shift
info "Listing current XDP device($device) setting"
call_ip link show dev $device | grep --color=auto xdp
}
function flush_tc()
{
local device="$1"
shift
info "Flush TC on device: $device"
call_tc_allow_fail filter del dev $device ingress
call_tc_allow_fail qdisc del dev $device clsact
}
function flush_xdp()
{
local device="$1"
shift
info "Flush XDP on device: $device"
call_ip link set dev $device xdp off
}
function attach_tc_mark()
{
local device="$1"
local file="$2"
local prog="tc_mark"
shift 2
# Re-attach clsact to clear/flush existing role
call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
call_tc qdisc add dev $device clsact
# Attach BPF prog
call_tc filter add dev $device ingress \
prio 1 handle 1 bpf da obj $file sec $prog
}
function attach_xdp_mark()
{
local device="$1"
local file="$2"
local prog="xdp_mark"
shift 2
# Remove XDP prog in-case it's already loaded
# TODO: Need ip-link option to override/replace existing XDP prog
flush_xdp $device
# Attach XDP/BPF prog
call_ip link set dev $device xdp obj $file sec $prog
}
if [[ -n $FLUSH ]]; then
flush_tc $DEV
flush_xdp $DEV
exit 0
fi
if [[ -n $LIST ]]; then
list_tc $DEV
list_xdp $DEV
exit 0
fi
attach_tc_mark $DEV $FILE
attach_xdp_mark $DEV $FILE
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
*
* Example howto transfer info from XDP to SKB, e.g. skb->mark
* -----------------------------------------------------------
* This uses the XDP data_meta infrastructure, and is a cooperation
* between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
*
* Notice: This example does not use the BPF C-loader (bpf_load.c),
* but instead rely on the iproute2 TC tool for loading BPF-objects.
*/
#include <uapi/linux/bpf.h>
#include <uapi/linux/pkt_cls.h>
#include "bpf_helpers.h"
/*
* This struct is stored in the XDP 'data_meta' area, which is located
* just in-front-of the raw packet payload data. The meaning is
* specific to these two BPF programs that use it as a communication
* channel. XDP adjust/increase the area via a bpf-helper, and TC use
* boundary checks to see if data have been provided.
*
* The struct must be 4 byte aligned, which here is enforced by the
* struct __attribute__((aligned(4))).
*/
struct meta_info {
__u32 mark;
} __attribute__((aligned(4)));
SEC("xdp_mark")
int _xdp_mark(struct xdp_md *ctx)
{
struct meta_info *meta;
void *data, *data_end;
int ret;
/* Reserve space in-front data pointer for our meta info.
* (Notice drivers not supporting data_meta will fail here!)
*/
ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
if (ret < 0)
return XDP_ABORTED;
/* For some unknown reason, these ctx pointers must be read
* after bpf_xdp_adjust_meta, else verifier will reject prog.
*/
data = (void *)(unsigned long)ctx->data;
/* Check data_meta have room for meta_info struct */
meta = (void *)(unsigned long)ctx->data_meta;
if (meta + 1 > data)
return XDP_ABORTED;
meta->mark = 42;
return XDP_PASS;
}
SEC("tc_mark")
int _tc_mark(struct __sk_buff *ctx)
{
void *data = (void *)(unsigned long)ctx->data;
void *data_end = (void *)(unsigned long)ctx->data_end;
void *data_meta = (void *)(unsigned long)ctx->data_meta;
struct meta_info *meta = data_meta;
/* Check XDP gave us some data_meta */
if (meta + 1 > data) {
ctx->mark = 41;
/* Skip "accept" if no data_meta is avail */
return TC_ACT_OK;
}
/* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */
return TC_ACT_OK;
}
/* Manually attaching these programs:
export DEV=ixgbe2
export FILE=xdp2skb_meta_kern.o
# via TC command
tc qdisc del dev $DEV clsact 2> /dev/null
tc qdisc add dev $DEV clsact
tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
tc filter show dev $DEV ingress
# XDP via IP command:
ip link set dev $DEV xdp off
ip link set dev $DEV xdp obj $FILE sec xdp_mark
# Use iptable to "see" if SKBs are marked
iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29
iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a
# Hint: catch XDP_ABORTED errors via
perf record -e xdp:*
perf script
*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment