Commit 01adc485 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Minor conflict, a CHECK was placed into an if() statement
in net-next, whilst a newline was added to that CHECK
call in 'net'.  Thanks to Daniel for the merge resolution.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 18b338f5 e94fa1d9
This diff is collapsed.
......@@ -483,6 +483,12 @@ Example output from dmesg:
[ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
[ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3
When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and
setting any other value than that will return in failure. This is even the case for
setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log
is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the
generally recommended approach instead.
In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for
generating disassembly out of the kernel log's hexdump:
......
......@@ -6,6 +6,7 @@ Contents:
.. toctree::
:maxdepth: 2
af_xdp
batman-adv
can
dpaa2/index
......
......@@ -45,6 +45,7 @@ through bpf(2) and passing a verifier in the kernel, a JIT will then
translate these BPF proglets into native CPU instructions. There are
two flavors of JITs, the newer eBPF JIT currently supported on:
- x86_64
- x86_32
- arm64
- arm32
- ppc64
......
......@@ -2729,7 +2729,6 @@ F: Documentation/networking/filter.txt
F: Documentation/bpf/
F: include/linux/bpf*
F: include/linux/filter.h
F: include/trace/events/bpf.h
F: include/trace/events/xdp.h
F: include/uapi/linux/bpf*
F: include/uapi/linux/filter.h
......@@ -15408,6 +15407,14 @@ T: git git://linuxtv.org/media_tree.git
S: Maintained
F: drivers/media/tuners/tuner-xc2028.*
XDP SOCKETS (AF_XDP)
M: Björn Töpel <bjorn.topel@intel.com>
M: Magnus Karlsson <magnus.karlsson@intel.com>
L: netdev@vger.kernel.org
S: Maintained
F: kernel/bpf/xskmap.c
F: net/xdp/
XEN BLOCK SUBSYSTEM
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
M: Roger Pau Monné <roger.pau@citrix.com>
......
......@@ -1452,83 +1452,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
break;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
case BPF_LD | BPF_ABS | BPF_W:
case BPF_LD | BPF_ABS | BPF_H:
case BPF_LD | BPF_ABS | BPF_B:
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
case BPF_LD | BPF_IND | BPF_W:
case BPF_LD | BPF_IND | BPF_H:
case BPF_LD | BPF_IND | BPF_B:
{
const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
/* rtn value */
const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
int size;
/* Setting up first argument */
emit(ARM_MOV_R(r0, r4), ctx);
/* Setting up second argument */
emit_a32_mov_i(r1, imm, false, ctx);
if (BPF_MODE(code) == BPF_IND)
emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
false, false, BPF_ADD);
/* Setting up third argument */
switch (BPF_SIZE(code)) {
case BPF_W:
size = 4;
break;
case BPF_H:
size = 2;
break;
case BPF_B:
size = 1;
break;
default:
return -EINVAL;
}
emit_a32_mov_i(r2, size, false, ctx);
/* Setting up fourth argument */
emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
/* Setting up function pointer to call */
emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
emit_blx_r(r6, ctx);
emit(ARM_EOR_R(r1, r1, r1), ctx);
/* Check if return address is NULL or not.
* if NULL then jump to epilogue
* else continue to load the value from retn address
*/
emit(ARM_CMP_I(r0, 0), ctx);
jmp_offset = epilogue_offset(ctx);
check_imm24(jmp_offset);
_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
/* Load value from the address */
switch (BPF_SIZE(code)) {
case BPF_W:
emit(ARM_LDR_I(r0, r0, 0), ctx);
emit_rev32(r0, r0, ctx);
break;
case BPF_H:
emit(ARM_LDRH_I(r0, r0, 0), ctx);
emit_rev16(r0, r0, ctx);
break;
case BPF_B:
emit(ARM_LDRB_I(r0, r0, 0), ctx);
/* No need to reverse */
break;
}
break;
}
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
......
......@@ -723,71 +723,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
break;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
case BPF_LD | BPF_ABS | BPF_W:
case BPF_LD | BPF_ABS | BPF_H:
case BPF_LD | BPF_ABS | BPF_B:
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
case BPF_LD | BPF_IND | BPF_W:
case BPF_LD | BPF_IND | BPF_H:
case BPF_LD | BPF_IND | BPF_B:
{
const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
int size;
emit(A64_MOV(1, r1, r6), ctx);
emit_a64_mov_i(0, r2, imm, ctx);
if (BPF_MODE(code) == BPF_IND)
emit(A64_ADD(0, r2, r2, src), ctx);
switch (BPF_SIZE(code)) {
case BPF_W:
size = 4;
break;
case BPF_H:
size = 2;
break;
case BPF_B:
size = 1;
break;
default:
return -EINVAL;
}
emit_a64_mov_i64(r3, size, ctx);
emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
emit(A64_BLR(r5), ctx);
emit(A64_MOV(1, r0, A64_R(0)), ctx);
jmp_offset = epilogue_offset(ctx);
check_imm19(jmp_offset);
emit(A64_CBZ(1, r0, jmp_offset), ctx);
emit(A64_MOV(1, r5, r0), ctx);
switch (BPF_SIZE(code)) {
case BPF_W:
emit(A64_LDR32(r0, r5, A64_ZR), ctx);
#ifndef CONFIG_CPU_BIG_ENDIAN
emit(A64_REV32(0, r0, r0), ctx);
#endif
break;
case BPF_H:
emit(A64_LDRH(r0, r5, A64_ZR), ctx);
#ifndef CONFIG_CPU_BIG_ENDIAN
emit(A64_REV16(0, r0, r0), ctx);
#endif
break;
case BPF_B:
emit(A64_LDRB(r0, r5, A64_ZR), ctx);
break;
}
break;
}
default:
pr_err_once("unknown opcode %02x\n", code);
return -EINVAL;
......
......@@ -1267,110 +1267,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return -EINVAL;
break;
case BPF_LD | BPF_B | BPF_ABS:
case BPF_LD | BPF_H | BPF_ABS:
case BPF_LD | BPF_W | BPF_ABS:
case BPF_LD | BPF_DW | BPF_ABS:
ctx->flags |= EBPF_SAVE_RA;
gen_imm_to_reg(insn, MIPS_R_A1, ctx);
emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
if (insn->imm < 0) {
emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
} else {
emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
}
goto ld_skb_common;
case BPF_LD | BPF_B | BPF_IND:
case BPF_LD | BPF_H | BPF_IND:
case BPF_LD | BPF_W | BPF_IND:
case BPF_LD | BPF_DW | BPF_IND:
ctx->flags |= EBPF_SAVE_RA;
src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
if (src < 0)
return src;
ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
if (ts == REG_32BIT_ZERO_EX) {
/* sign extend */
emit_instr(ctx, sll, MIPS_R_A1, src, 0);
src = MIPS_R_A1;
}
if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
} else {
gen_imm_to_reg(insn, MIPS_R_AT, ctx);
emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
}
/* truncate to 32-bit int */
emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
ld_skb_common:
emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
/* delay slot move */
emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
/* Check the error value */
b_off = b_imm(exit_idx, ctx);
if (is_bad_offset(b_off)) {
target = j_target(ctx, exit_idx);
if (target == (unsigned int)-1)
return -E2BIG;
if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
ctx->offsets[this_idx] |= OFFSETS_B_CONV;
ctx->long_b_conversion = 1;
}
emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
emit_instr(ctx, nop);
emit_instr(ctx, j, target);
emit_instr(ctx, nop);
} else {
emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
emit_instr(ctx, nop);
}
#ifdef __BIG_ENDIAN
need_swap = false;
#else
need_swap = true;
#endif
dst = MIPS_R_V0;
switch (BPF_SIZE(insn->code)) {
case BPF_B:
emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
break;
case BPF_H:
emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
if (need_swap)
emit_instr(ctx, wsbh, dst, dst);
break;
case BPF_W:
emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
if (need_swap) {
emit_instr(ctx, wsbh, dst, dst);
emit_instr(ctx, rotr, dst, dst, 16);
}
break;
case BPF_DW:
emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
if (need_swap) {
emit_instr(ctx, dsbh, dst, dst);
emit_instr(ctx, dshd, dst, dst);
}
break;
}
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
case BPF_ALU | BPF_END | BPF_FROM_LE:
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
......
......@@ -3,7 +3,7 @@
# Arch-specific network modules
#
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
else
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
endif
......@@ -20,7 +20,7 @@
* with our redzone usage.
*
* [ prev sp ] <-------------
* [ nv gpr save area ] 8*8 |
* [ nv gpr save area ] 6*8 |
* [ tail_call_cnt ] 8 |
* [ local_tmp_var ] 8 |
* fp (r31) --> [ ebpf stack space ] upto 512 |
......@@ -28,8 +28,8 @@
* sp (r1) ---> [ stack pointer ] --------------
*/
/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
#define BPF_PPC_STACK_SAVE (8*8)
/* for gpr non volatile registers BPG_REG_6 to 10 */
#define BPF_PPC_STACK_SAVE (6*8)
/* for bpf JIT code internal usage */
#define BPF_PPC_STACK_LOCALS 16
/* stack frame excluding BPF stack, ensure this is quadword aligned */
......@@ -39,10 +39,8 @@
#ifndef __ASSEMBLY__
/* BPF register usage */
#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0)
#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1)
#define TMP_REG_1 (MAX_BPF_JIT_REG + 2)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 3)
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
/* BPF to ppc register mappings */
static const int b2p[] = {
......@@ -63,40 +61,23 @@ static const int b2p[] = {
[BPF_REG_FP] = 31,
/* eBPF jit internal registers */
[BPF_REG_AX] = 2,
[SKB_HLEN_REG] = 25,
[SKB_DATA_REG] = 26,
[TMP_REG_1] = 9,
[TMP_REG_2] = 10
};
/* PPC NVR range -- update this if we ever use NVRs below r24 */
#define BPF_PPC_NVR_MIN 24
/* Assembly helpers */
#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \
u64 func##_negative_offset(u64 r3, u64 r4); \
u64 func##_positive_offset(u64 r3, u64 r4);
DECLARE_LOAD_FUNC(sk_load_word);
DECLARE_LOAD_FUNC(sk_load_half);
DECLARE_LOAD_FUNC(sk_load_byte);
#define CHOOSE_LOAD_FUNC(imm, func) \
(imm < 0 ? \
(imm >= SKF_LL_OFF ? func##_negative_offset : func) : \
func##_positive_offset)
/* PPC NVR range -- update this if we ever use NVRs below r27 */
#define BPF_PPC_NVR_MIN 27
#define SEEN_FUNC 0x1000 /* might call external helpers */
#define SEEN_STACK 0x2000 /* uses BPF stack */
#define SEEN_SKB 0x4000 /* uses sk_buff */
#define SEEN_TAILCALL 0x8000 /* uses tail calls */
#define SEEN_TAILCALL 0x4000 /* uses tail calls */
struct codegen_context {
/*
* This is used to track register usage as well
* as calls to external helpers.
* - register usage is tracked with corresponding
* bits (r3-r10 and r25-r31)
* bits (r3-r10 and r27-r31)
* - rest of the bits can be used to track other
* things -- for now, we use bits 16 to 23
* encoded in SEEN_* macros above
......
/*
* bpf_jit_asm64.S: Packet/header access helper functions
* for PPC64 BPF compiler.
*
* Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
* IBM Corporation
*
* Based on bpf_jit_asm.S by Matt Evans
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <asm/ppc_asm.h>
#include <asm/ptrace.h>
#include "bpf_jit64.h"
/*
* All of these routines are called directly from generated code,
* with the below register usage:
* r27 skb pointer (ctx)
* r25 skb header length
* r26 skb->data pointer
* r4 offset
*
* Result is passed back in:
* r8 data read in host endian format (accumulator)
*
* r9 is used as a temporary register
*/
#define r_skb r27
#define r_hlen r25
#define r_data r26
#define r_off r4
#define r_val r8
#define r_tmp r9
_GLOBAL_TOC(sk_load_word)
cmpdi r_off, 0
blt bpf_slow_path_word_neg
b sk_load_word_positive_offset
_GLOBAL_TOC(sk_load_word_positive_offset)
/* Are we accessing past headlen? */
subi r_tmp, r_hlen, 4
cmpd r_tmp, r_off
blt bpf_slow_path_word
/* Nope, just hitting the header. cr0 here is eq or gt! */
LWZX_BE r_val, r_data, r_off
blr /* Return success, cr0 != LT */
_GLOBAL_TOC(sk_load_half)
cmpdi r_off, 0
blt bpf_slow_path_half_neg
b sk_load_half_positive_offset
_GLOBAL_TOC(sk_load_half_positive_offset)
subi r_tmp, r_hlen, 2
cmpd r_tmp, r_off
blt bpf_slow_path_half
LHZX_BE r_val, r_data, r_off
blr
_GLOBAL_TOC(sk_load_byte)
cmpdi r_off, 0
blt bpf_slow_path_byte_neg
b sk_load_byte_positive_offset
_GLOBAL_TOC(sk_load_byte_positive_offset)
cmpd r_hlen, r_off
ble bpf_slow_path_byte
lbzx r_val, r_data, r_off
blr
/*
* Call out to skb_copy_bits:
* Allocate a new stack frame here to remain ABI-compliant in
* stashing LR.
*/
#define bpf_slow_path_common(SIZE) \
mflr r0; \
std r0, PPC_LR_STKOFF(r1); \
stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \
mr r3, r_skb; \
/* r4 = r_off as passed */ \
addi r5, r1, STACK_FRAME_MIN_SIZE; \
li r6, SIZE; \
bl skb_copy_bits; \
nop; \
/* save r5 */ \
addi r5, r1, STACK_FRAME_MIN_SIZE; \
/* r3 = 0 on success */ \
addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \
ld r0, PPC_LR_STKOFF(r1); \
mtlr r0; \
cmpdi r3, 0; \
blt bpf_error; /* cr0 = LT */
bpf_slow_path_word:
bpf_slow_path_common(4)
/* Data value is on stack, and cr0 != LT */
LWZX_BE r_val, 0, r5
blr
bpf_slow_path_half:
bpf_slow_path_common(2)
LHZX_BE r_val, 0, r5
blr
bpf_slow_path_byte:
bpf_slow_path_common(1)
lbzx r_val, 0, r5
blr
/*
* Call out to bpf_internal_load_pointer_neg_helper
*/
#define sk_negative_common(SIZE) \
mflr r0; \
std r0, PPC_LR_STKOFF(r1); \
stdu r1, -STACK_FRAME_MIN_SIZE(r1); \
mr r3, r_skb; \
/* r4 = r_off, as passed */ \
li r5, SIZE; \
bl bpf_internal_load_pointer_neg_helper; \
nop; \
addi r1, r1, STACK_FRAME_MIN_SIZE; \
ld r0, PPC_LR_STKOFF(r1); \
mtlr r0; \
/* R3 != 0 on success */ \
cmpldi r3, 0; \
beq bpf_error_slow; /* cr0 = EQ */
bpf_slow_path_word_neg:
lis r_tmp, -32 /* SKF_LL_OFF */
cmpd r_off, r_tmp /* addr < SKF_* */
blt bpf_error /* cr0 = LT */
b sk_load_word_negative_offset
_GLOBAL_TOC(sk_load_word_negative_offset)
sk_negative_common(4)
LWZX_BE r_val, 0, r3
blr
bpf_slow_path_half_neg:
lis r_tmp, -32 /* SKF_LL_OFF */
cmpd r_off, r_tmp /* addr < SKF_* */
blt bpf_error /* cr0 = LT */
b sk_load_half_negative_offset
_GLOBAL_TOC(sk_load_half_negative_offset)
sk_negative_common(2)
LHZX_BE r_val, 0, r3
blr
bpf_slow_path_byte_neg:
lis r_tmp, -32 /* SKF_LL_OFF */
cmpd r_off, r_tmp /* addr < SKF_* */
blt bpf_error /* cr0 = LT */
b sk_load_byte_negative_offset
_GLOBAL_TOC(sk_load_byte_negative_offset)
sk_negative_common(1)
lbzx r_val, 0, r3
blr
bpf_error_slow:
/* fabricate a cr0 = lt */
li r_tmp, -1
cmpdi r_tmp, 0
bpf_error:
/*
* Entered with cr0 = lt
* Generated code will 'blt epilogue', returning 0.
*/
li r_val, 0
blr
......@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
* [ prev sp ] <-------------
* [ ... ] |
* sp (r1) ---> [ stack pointer ] --------------
* [ nv gpr save area ] 8*8
* [ nv gpr save area ] 6*8
* [ tail_call_cnt ] 8
* [ local_tmp_var ] 8
* [ unused red zone ] 208 bytes protected
......@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
BUG();
}
static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
{
/*
* Load skb->len and skb->data_len
* r3 points to skb
*/
PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
/* header_len = len - data_len */
PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
/* skb->data pointer */
PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
}
static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
......@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
if (bpf_is_seen_register(ctx, i))
PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
/*
* Save additional non-volatile regs if we cache skb
* Also, setup skb data
*/
if (ctx->seen & SEEN_SKB) {
PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
bpf_jit_emit_skb_loads(image, ctx);
}
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, BPF_REG_FP))
PPC_ADDI(b2p[BPF_REG_FP], 1,
......@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
if (bpf_is_seen_register(ctx, i))
PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
/* Restore non-volatile registers used for skb cache */
if (ctx->seen & SEEN_SKB) {
PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
}
/* Tear down our stack frame */
if (bpf_has_stack_frame(ctx)) {
PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
......@@ -753,23 +718,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
ctx->seen |= SEEN_FUNC;
func = (u8 *) __bpf_call_base + imm;
/* Save skb pointer if we need to re-cache skb data */
if ((ctx->seen & SEEN_SKB) &&
bpf_helper_changes_pkt_data(func))
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_func_call(image, ctx, (u64)func);
/* move return value from r3 to BPF_REG_0 */
PPC_MR(b2p[BPF_REG_0], 3);
/* refresh skb cache */
if ((ctx->seen & SEEN_SKB) &&
bpf_helper_changes_pkt_data(func)) {
/* reload skb pointer to r3 */
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
}
break;
/*
......@@ -886,65 +838,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_BCC(true_cond, addrs[i + 1 + off]);
break;
/*
* Loads from packet header/data
* Assume 32-bit input value in imm and X (src_reg)
*/
/* Absolute loads */
case BPF_LD | BPF_W | BPF_ABS:
func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
goto common_load_abs;
case BPF_LD | BPF_H | BPF_ABS:
func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
goto common_load_abs;
case BPF_LD | BPF_B | BPF_ABS:
func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
common_load_abs:
/*
* Load from [imm]
* Load into r4, which can just be passed onto
* skb load helpers as the second parameter
*/
PPC_LI32(4, imm);
goto common_load;
/* Indirect loads */
case BPF_LD | BPF_W | BPF_IND:
func = (u8 *)sk_load_word;
goto common_load_ind;
case BPF_LD | BPF_H | BPF_IND:
func = (u8 *)sk_load_half;
goto common_load_ind;
case BPF_LD | BPF_B | BPF_IND:
func = (u8 *)sk_load_byte;
common_load_ind:
/*
* Load from [src_reg + imm]
* Treat src_reg as a 32-bit value
*/
PPC_EXTSW(4, src_reg);
if (imm) {
if (imm >= -32768 && imm < 32768)
PPC_ADDI(4, 4, IMM_L(imm));
else {
PPC_LI32(b2p[TMP_REG_1], imm);
PPC_ADD(4, 4, b2p[TMP_REG_1]);
}
}
common_load:
ctx->seen |= SEEN_SKB;
ctx->seen |= SEEN_FUNC;
bpf_jit_emit_func_call(image, ctx, (u64)func);
/*
* Helper returns 'lt' condition on error, and an
* appropriate return value in BPF_REG_0
*/
PPC_BCC(COND_LT, exit_addr);
break;
/*
* Tail call
*/
......
......@@ -2,4 +2,4 @@
#
# Arch-specific network modules
#
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
/* SPDX-License-Identifier: GPL-2.0 */
/*
* BPF Jit compiler for s390, help functions.
*
* Copyright IBM Corp. 2012,2015
*
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
#include <linux/linkage.h>
#include "bpf_jit.h"
/*
* Calling convention:
* registers %r7-%r10, %r11,%r13, and %r15 are call saved
*
* Input (64 bit):
* %r3 (%b2) = offset into skb data
* %r6 (%b5) = return address
* %r7 (%b6) = skb pointer
* %r12 = skb data pointer
*
* Output:
* %r14= %b0 = return value (read skb value)
*
* Work registers: %r2,%r4,%r5,%r14
*
* skb_copy_bits takes 4 parameters:
* %r2 = skb pointer
* %r3 = offset into skb data
* %r4 = pointer to temp buffer
* %r5 = length to copy
* Return value in %r2: 0 = ok
*
* bpf_internal_load_pointer_neg_helper takes 3 parameters:
* %r2 = skb pointer
* %r3 = offset into data
* %r4 = length to copy
* Return value in %r2: Pointer to data
*/
#define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */
/*
* Load SIZE bytes from SKB
*/
#define sk_load_common(NAME, SIZE, LOAD) \
ENTRY(sk_load_##NAME); \
ltgr %r3,%r3; /* Is offset negative? */ \
jl sk_load_##NAME##_slow_neg; \
ENTRY(sk_load_##NAME##_pos); \
aghi %r3,SIZE; /* Offset + SIZE */ \
clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \
jh sk_load_##NAME##_slow; \
LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \
b OFF_OK(%r6); /* Return */ \
\
sk_load_##NAME##_slow:; \
lgr %r2,%r7; /* Arg1 = skb pointer */ \
aghi %r3,-SIZE; /* Arg2 = offset */ \
la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \
lghi %r5,SIZE; /* Arg4 = size */ \
brasl %r14,skb_copy_bits; /* Get data from skb */ \
LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \
ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \
br %r6; /* Return */
sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */
sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */
/*
* Load 1 byte from SKB (optimized version)
*/
/* r14 = *(u8 *) (skb->data+offset) */
ENTRY(sk_load_byte)
ltgr %r3,%r3 # Is offset negative?
jl sk_load_byte_slow_neg
ENTRY(sk_load_byte_pos)
clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen?
jnl sk_load_byte_slow
llgc %r14,0(%r3,%r12) # Get byte from skb
b OFF_OK(%r6) # Return OK
sk_load_byte_slow:
lgr %r2,%r7 # Arg1 = skb pointer
# Arg2 = offset
la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer
lghi %r5,1 # Arg4 = size (1 byte)
brasl %r14,skb_copy_bits # Get data from skb
llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer
ltgr %r2,%r2 # Set cc to (%r2 != 0)
br %r6 # Return cc
#define sk_negative_common(NAME, SIZE, LOAD) \
sk_load_##NAME##_slow_neg:; \
cgfi %r3,SKF_MAX_NEG_OFF; \
jl bpf_error; \
lgr %r2,%r7; /* Arg1 = skb pointer */ \
/* Arg2 = offset */ \
lghi %r4,SIZE; /* Arg3 = size */ \
brasl %r14,bpf_internal_load_pointer_neg_helper; \
ltgr %r2,%r2; \
jz bpf_error; \
LOAD %r14,0(%r2); /* Get data from pointer */ \
xr %r3,%r3; /* Set cc to zero */ \
br %r6; /* Return cc */
sk_negative_common(word, 4, llgf)
sk_negative_common(half, 2, llgh)
sk_negative_common(byte, 1, llgc)
bpf_error:
# force a return 0 from jit handler
ltgr %r15,%r15 # Set condition code
br %r6
......@@ -16,9 +16,6 @@
#include <linux/filter.h>
#include <linux/types.h>
extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
#endif /* __ASSEMBLY__ */
/*
......@@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* | | |
* | BPF stack | |
* | | |
* +---------------+ |
* | 8 byte skbp | |
* R15+176 -> +---------------+ |
* | 8 byte hlen | |
* R15+168 -> +---------------+ |
* | 4 byte align | |
* +---------------+ |
* | 4 byte temp | |
* | for bpf_jit.S | |
* R15+160 -> +---------------+ |
* | new backchain | |
* R15+152 -> +---------------+ |
......@@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* The stack size used by the BPF program ("BPF stack" above) is passed
* via "aux->stack_depth".
*/
#define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160)
#define STK_SPACE_ADD (160)
#define STK_160_UNUSED (160 - 12 * 8)
#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
#define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */
#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
/* Offset to skip condition code check */
#define OFF_OK 4
#endif /* __ARCH_S390_NET_BPF_JIT_H */
......@@ -47,23 +47,21 @@ struct bpf_jit {
#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
#define SEEN_SKB 1 /* skb access */
#define SEEN_MEM 2 /* use mem[] for temporary storage */
#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */
#define SEEN_LITERAL 8 /* code uses literals */
#define SEEN_FUNC 16 /* calls C functions */
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
#define SEEN_REG_AX 64 /* code uses constant blinding */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
#define SEEN_LITERAL (1 << 2) /* code uses literals */
#define SEEN_FUNC (1 << 3) /* calls C functions */
#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
* s390 registers
*/
#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */
#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */
#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */
#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */
#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */
#define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */
#define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */
#define REG_0 REG_W0 /* Register 0 */
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
......@@ -88,10 +86,8 @@ static const int reg2hex[] = {
[BPF_REG_9] = 10,
/* BPF stack pointer */
[BPF_REG_FP] = 13,
/* Register for blinding (shared with REG_SKB_DATA) */
/* Register for blinding */
[BPF_REG_AX] = 12,
/* SKB data pointer */
[REG_SKB_DATA] = 12,
/* Work registers for s390x backend */
[REG_W0] = 0,
[REG_W1] = 1,
......@@ -384,27 +380,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
} while (re <= 15);
}
/*
* For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
* we store the SKB header length on the stack and the SKB data
* pointer in REG_SKB_DATA if BPF_REG_AX is not used.
*/
static void emit_load_skb_data_hlen(struct bpf_jit *jit)
{
/* Header length: llgf %w1,<len>(%b1) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
offsetof(struct sk_buff, len));
/* s %w1,<data_len>(%b1) */
EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
offsetof(struct sk_buff, data_len));
/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
if (!(jit->seen & SEEN_REG_AX))
/* lg %skb_data,data_off(%b1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
BPF_REG_1, offsetof(struct sk_buff, data));
}
/*
* Emit function prologue
*
......@@ -445,12 +420,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
if (jit->seen & SEEN_SKB) {
emit_load_skb_data_hlen(jit);
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
STK_OFF_SKBP);
}
}
/*
......@@ -483,12 +452,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
unsigned int func_addr, mask;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
unsigned int mask;
if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
jit->seen |= SEEN_REG_AX;
......@@ -970,13 +939,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
if ((jit->seen & SEEN_SKB) &&
bpf_helper_changes_pkt_data((void *)func)) {
/* lg %b1,ST_OFF_SKBP(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
REG_15, STK_OFF_SKBP);
emit_load_skb_data_hlen(jit);
}
break;
}
case BPF_JMP | BPF_TAIL_CALL:
......@@ -1176,73 +1138,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
break;
/*
* BPF_LD
*/
case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
func_addr = __pa(sk_load_byte_pos);
else
func_addr = __pa(sk_load_byte);
goto call_fn;
case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
func_addr = __pa(sk_load_half_pos);
else
func_addr = __pa(sk_load_half);
goto call_fn;
case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
func_addr = __pa(sk_load_word_pos);
else
func_addr = __pa(sk_load_word);
goto call_fn;
call_fn:
jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
REG_SET_SEEN(REG_14); /* Return address of possible func call */
/*
* Implicit input:
* BPF_REG_6 (R7) : skb pointer
* REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
*
* Calculated input:
* BPF_REG_2 (R3) : offset of byte(s) to fetch in skb
* BPF_REG_5 (R6) : return address
*
* Output:
* BPF_REG_0 (R14): data read from skb
*
* Scratch registers (BPF_REG_1-5)
*/
/* Call function: llilf %w1,func_addr */
EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
/* Offset: lgfi %b2,imm */
EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
if (BPF_MODE(insn->code) == BPF_IND)
/* agfr %b2,%src (%src is s32 here) */
EMIT4(0xb9180000, BPF_REG_2, src_reg);
/* Reload REG_SKB_DATA if BPF_REG_AX is used */
if (jit->seen & SEEN_REG_AX)
/* lg %skb_data,data_off(%b6) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
BPF_REG_6, offsetof(struct sk_buff, data));
/* basr %b5,%w1 (%b5 is call saved) */
EMIT2(0x0d00, BPF_REG_5, REG_W1);
/*
* Note: For fast access we jump directly after the
* jnz instruction from bpf_jit.S
*/
/* jnz <ret0> */
EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
break;
default: /* too complex, give up */
pr_err("Unknown opcode %02x\n", insn->code);
return -1;
......
#
# Arch-specific network modules
#
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o
ifeq ($(BITS),32)
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o
endif
......@@ -33,35 +33,6 @@
#define I5 0x1d
#define FP 0x1e
#define I7 0x1f
#define r_SKB L0
#define r_HEADLEN L4
#define r_SKB_DATA L5
#define r_TMP G1
#define r_TMP2 G3
/* assembly code in arch/sparc/net/bpf_jit_asm_64.S */
extern u32 bpf_jit_load_word[];
extern u32 bpf_jit_load_half[];
extern u32 bpf_jit_load_byte[];
extern u32 bpf_jit_load_byte_msh[];
extern u32 bpf_jit_load_word_positive_offset[];
extern u32 bpf_jit_load_half_positive_offset[];
extern u32 bpf_jit_load_byte_positive_offset[];
extern u32 bpf_jit_load_byte_msh_positive_offset[];
extern u32 bpf_jit_load_word_negative_offset[];
extern u32 bpf_jit_load_half_negative_offset[];
extern u32 bpf_jit_load_byte_negative_offset[];
extern u32 bpf_jit_load_byte_msh_negative_offset[];
#else
#define r_RESULT %o0
#define r_SKB %o0
#define r_OFF %o1
#define r_HEADLEN %l4
#define r_SKB_DATA %l5
#define r_TMP %g1
#define r_TMP2 %g3
#endif
#endif /* _BPF_JIT_H */
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/ptrace.h>
#include "bpf_jit_64.h"
#define SAVE_SZ 176
#define SCRATCH_OFF STACK_BIAS + 128
#define BE_PTR(label) be,pn %xcc, label
#define SIGN_EXTEND(reg) sra reg, 0, reg
#define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */
.text
.globl bpf_jit_load_word
bpf_jit_load_word:
cmp r_OFF, 0
bl bpf_slow_path_word_neg
nop
.globl bpf_jit_load_word_positive_offset
bpf_jit_load_word_positive_offset:
sub r_HEADLEN, r_OFF, r_TMP
cmp r_TMP, 3
ble bpf_slow_path_word
add r_SKB_DATA, r_OFF, r_TMP
andcc r_TMP, 3, %g0
bne load_word_unaligned
nop
retl
ld [r_TMP], r_RESULT
load_word_unaligned:
ldub [r_TMP + 0x0], r_OFF
ldub [r_TMP + 0x1], r_TMP2
sll r_OFF, 8, r_OFF
or r_OFF, r_TMP2, r_OFF
ldub [r_TMP + 0x2], r_TMP2
sll r_OFF, 8, r_OFF
or r_OFF, r_TMP2, r_OFF
ldub [r_TMP + 0x3], r_TMP2
sll r_OFF, 8, r_OFF
retl
or r_OFF, r_TMP2, r_RESULT
.globl bpf_jit_load_half
bpf_jit_load_half:
cmp r_OFF, 0
bl bpf_slow_path_half_neg
nop
.globl bpf_jit_load_half_positive_offset
bpf_jit_load_half_positive_offset:
sub r_HEADLEN, r_OFF, r_TMP
cmp r_TMP, 1
ble bpf_slow_path_half
add r_SKB_DATA, r_OFF, r_TMP
andcc r_TMP, 1, %g0
bne load_half_unaligned
nop
retl
lduh [r_TMP], r_RESULT
load_half_unaligned:
ldub [r_TMP + 0x0], r_OFF
ldub [r_TMP + 0x1], r_TMP2
sll r_OFF, 8, r_OFF
retl
or r_OFF, r_TMP2, r_RESULT
.globl bpf_jit_load_byte
bpf_jit_load_byte:
cmp r_OFF, 0
bl bpf_slow_path_byte_neg
nop
.globl bpf_jit_load_byte_positive_offset
bpf_jit_load_byte_positive_offset:
cmp r_OFF, r_HEADLEN
bge bpf_slow_path_byte
nop
retl
ldub [r_SKB_DATA + r_OFF], r_RESULT
#define bpf_slow_path_common(LEN) \
save %sp, -SAVE_SZ, %sp; \
mov %i0, %o0; \
mov %i1, %o1; \
add %fp, SCRATCH_OFF, %o2; \
call skb_copy_bits; \
mov (LEN), %o3; \
cmp %o0, 0; \
restore;
bpf_slow_path_word:
bpf_slow_path_common(4)
bl bpf_error
ld [%sp + SCRATCH_OFF], r_RESULT
retl
nop
bpf_slow_path_half:
bpf_slow_path_common(2)
bl bpf_error
lduh [%sp + SCRATCH_OFF], r_RESULT
retl
nop
bpf_slow_path_byte:
bpf_slow_path_common(1)
bl bpf_error
ldub [%sp + SCRATCH_OFF], r_RESULT
retl
nop
#define bpf_negative_common(LEN) \
save %sp, -SAVE_SZ, %sp; \
mov %i0, %o0; \
mov %i1, %o1; \
SIGN_EXTEND(%o1); \
call bpf_internal_load_pointer_neg_helper; \
mov (LEN), %o2; \
mov %o0, r_TMP; \
cmp %o0, 0; \
BE_PTR(bpf_error); \
restore;
bpf_slow_path_word_neg:
sethi %hi(SKF_MAX_NEG_OFF), r_TMP
cmp r_OFF, r_TMP
bl bpf_error
nop
.globl bpf_jit_load_word_negative_offset
bpf_jit_load_word_negative_offset:
bpf_negative_common(4)
andcc r_TMP, 3, %g0
bne load_word_unaligned
nop
retl
ld [r_TMP], r_RESULT
bpf_slow_path_half_neg:
sethi %hi(SKF_MAX_NEG_OFF), r_TMP
cmp r_OFF, r_TMP
bl bpf_error
nop
.globl bpf_jit_load_half_negative_offset
bpf_jit_load_half_negative_offset:
bpf_negative_common(2)
andcc r_TMP, 1, %g0
bne load_half_unaligned
nop
retl
lduh [r_TMP], r_RESULT
bpf_slow_path_byte_neg:
sethi %hi(SKF_MAX_NEG_OFF), r_TMP
cmp r_OFF, r_TMP
bl bpf_error
nop
.globl bpf_jit_load_byte_negative_offset
bpf_jit_load_byte_negative_offset:
bpf_negative_common(1)
retl
ldub [r_TMP], r_RESULT
bpf_error:
/* Make the JIT program itself return zero. */
ret
restore %g0, %g0, %o0
......@@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_)
}
}
#define SEEN_DATAREF 1 /* might call external helpers */
#define SEEN_XREG 2 /* ebx is used */
#define SEEN_MEM 4 /* use mem[] for temporary storage */
#define S13(X) ((X) & 0x1fff)
#define S5(X) ((X) & 0x1f)
#define IMMED 0x00002000
......@@ -198,7 +194,6 @@ struct jit_ctx {
bool tmp_1_used;
bool tmp_2_used;
bool tmp_3_used;
bool saw_ld_abs_ind;
bool saw_frame_pointer;
bool saw_call;
bool saw_tail_call;
......@@ -207,9 +202,7 @@ struct jit_ctx {
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2)
#define SKB_DATA_REG (MAX_BPF_JIT_REG + 3)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 4)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 2)
/* Map BPF registers to SPARC registers */
static const int bpf2sparc[] = {
......@@ -238,9 +231,6 @@ static const int bpf2sparc[] = {
[TMP_REG_1] = G1,
[TMP_REG_2] = G2,
[TMP_REG_3] = G3,
[SKB_HLEN_REG] = L4,
[SKB_DATA_REG] = L5,
};
static void emit(const u32 insn, struct jit_ctx *ctx)
......@@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
return 0;
}
static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb)
{
const u8 r_headlen = bpf2sparc[SKB_HLEN_REG];
const u8 r_data = bpf2sparc[SKB_DATA_REG];
const u8 r_tmp = bpf2sparc[TMP_REG_1];
unsigned int off;
off = offsetof(struct sk_buff, len);
emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx);
off = offsetof(struct sk_buff, data_len);
emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx);
emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx);
off = offsetof(struct sk_buff, data);
emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx);
}
/* Just skip the save instruction and the ctx register move. */
#define BPF_TAILCALL_PROLOGUE_SKIP 16
#define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128)
......@@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx)
emit_reg_move(I0, O0, ctx);
/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
if (ctx->saw_ld_abs_ind)
load_skb_regs(ctx, bpf2sparc[BPF_REG_1]);
}
static void build_epilogue(struct jit_ctx *ctx)
......@@ -1225,16 +1193,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
u8 *func = ((u8 *)__bpf_call_base) + imm;
ctx->saw_call = true;
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
emit_call((u32 *)func, ctx);
emit_nop(ctx);
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
load_skb_regs(ctx, L7);
break;
}
......@@ -1412,43 +1375,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_nop(ctx);
break;
}
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
case BPF_LD | BPF_ABS | BPF_W:
func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word);
goto common_load;
case BPF_LD | BPF_ABS | BPF_H:
func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half);
goto common_load;
case BPF_LD | BPF_ABS | BPF_B:
func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte);
goto common_load;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
case BPF_LD | BPF_IND | BPF_W:
func = bpf_jit_load_word;
goto common_load;
case BPF_LD | BPF_IND | BPF_H:
func = bpf_jit_load_half;
goto common_load;
case BPF_LD | BPF_IND | BPF_B:
func = bpf_jit_load_byte;
common_load:
ctx->saw_ld_abs_ind = true;
emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx);
emit_loadimm(imm, O1, ctx);
if (BPF_MODE(code) == BPF_IND)
emit_alu(ADD, src, O1, ctx);
emit_call(func, ctx);
emit_alu_K(SRA, O1, 0, ctx);
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
break;
default:
pr_err_once("unknown opcode %02x\n", code);
......@@ -1583,12 +1509,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
build_epilogue(&ctx);
if (bpf_jit_enable > 1)
pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass,
pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
image_size - (ctx.idx * 4),
ctx.tmp_1_used ? '1' : ' ',
ctx.tmp_2_used ? '2' : ' ',
ctx.tmp_3_used ? '3' : ' ',
ctx.saw_ld_abs_ind ? 'L' : ' ',
ctx.saw_frame_pointer ? 'F' : ' ',
ctx.saw_call ? 'C' : ' ',
ctx.saw_tail_call ? 'T' : ' ');
......
......@@ -138,7 +138,7 @@ config X86
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if X86_64
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EXIT_THREAD
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
......
......@@ -291,16 +291,20 @@ do { \
* lfence
* jmp spec_trap
* do_rop:
* mov %rax,(%rsp)
* mov %rax,(%rsp) for x86_64
* mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
* jmp *%rax
* jmp *%rax for x86_64
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 17
# define RETPOLINE_RAX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
EMIT2(0xF3, 0x90); /* pause */ \
......@@ -308,11 +312,31 @@ do { \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
EMIT1(0xC3); /* retq */
EMIT1(0xC3); /* retq */ \
} while (0)
#else
# define RETPOLINE_EDX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* call do_rop */ \
/* spec_trap: */ \
EMIT2(0xF3, 0x90); /* pause */ \
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
EMIT1(0xC3); /* ret */ \
} while (0)
#endif
#else /* !CONFIG_RETPOLINE */
#ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 2
# define RETPOLINE_RAX_BPF_JIT() \
EMIT2(0xFF, 0xE0); /* jmp *%rax */
#else
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
#endif
#endif
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
#
# Arch-specific network modules
#
OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
ifeq ($(CONFIG_X86_32),y)
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
else
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
endif
/* bpf_jit.S : BPF JIT helper functions
*
* Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
/*
* Calling convention :
* rbx : skb pointer (callee saved)
* esi : offset of byte(s) to fetch in skb (can be scratched)
* r10 : copy of skb->data
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
#define FUNC(name) \
.globl name; \
.type name, @function; \
name:
FUNC(sk_load_word)
test %esi,%esi
js bpf_slow_path_word_neg
FUNC(sk_load_word_positive_offset)
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
jle bpf_slow_path_word
mov (SKBDATA,%rsi),%eax
bswap %eax /* ntohl() */
ret
FUNC(sk_load_half)
test %esi,%esi
js bpf_slow_path_half_neg
FUNC(sk_load_half_positive_offset)
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
jle bpf_slow_path_half
movzwl (SKBDATA,%rsi),%eax
rol $8,%ax # ntohs()
ret
FUNC(sk_load_byte)
test %esi,%esi
js bpf_slow_path_byte_neg
FUNC(sk_load_byte_positive_offset)
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
lea 32(%rbp), %rdx;\
FRAME_BEGIN; \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
mov $LEN,%ecx; /* len */ \
call skb_copy_bits; \
test %eax,%eax; \
pop SKBDATA; \
pop %r9; \
FRAME_END
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
mov 32(%rbp),%eax
bswap %eax
ret
bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
mov 32(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret
bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
movzbl 32(%rbp),%eax
ret
#define sk_negative_common(SIZE) \
FRAME_BEGIN; \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
mov $SIZE,%edx; /* size */ \
call bpf_internal_load_pointer_neg_helper; \
test %rax,%rax; \
pop SKBDATA; \
pop %r9; \
FRAME_END; \
jz bpf_error
bpf_slow_path_word_neg:
cmp SKF_MAX_NEG_OFF, %esi /* test range */
jl bpf_error /* offset lower -> error */
FUNC(sk_load_word_negative_offset)
sk_negative_common(4)
mov (%rax), %eax
bswap %eax
ret
bpf_slow_path_half_neg:
cmp SKF_MAX_NEG_OFF, %esi
jl bpf_error
FUNC(sk_load_half_negative_offset)
sk_negative_common(2)
mov (%rax),%ax
rol $8,%ax
movzwl %ax,%eax
ret
bpf_slow_path_byte_neg:
cmp SKF_MAX_NEG_OFF, %esi
jl bpf_error
FUNC(sk_load_byte_negative_offset)
sk_negative_common(1)
movzbl (%rax), %eax
ret
bpf_error:
# force a return 0 from jit handler
xor %eax,%eax
mov (%rbp),%rbx
mov 8(%rbp),%r13
mov 16(%rbp),%r14
mov 24(%rbp),%r15
add $40, %rbp
leaveq
ret
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (C) 2017 Netronome Systems, Inc.
* Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
return nfp_bpf_cmsg_alloc(bpf, size);
}
static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
{
struct cmsg_hdr *hdr;
hdr = (struct cmsg_hdr *)skb->data;
return hdr->type;
}
static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
{
struct cmsg_hdr *hdr;
......@@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
goto err_free;
}
if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
nfp_bpf_event_output(bpf, skb);
return;
}
nfp_ctrl_lock(bpf->app->ctrl);
tag = nfp_bpf_cmsg_get_tag(skb);
......
/*
* Copyright (C) 2017 Netronome Systems, Inc.
* Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -37,6 +37,14 @@
#include <linux/bitops.h>
#include <linux/types.h>
/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
* our FW ABI. In that case we will do translation in the driver.
*/
#define NFP_BPF_SCALAR_VALUE 1
#define NFP_BPF_MAP_VALUE 4
#define NFP_BPF_STACK 6
#define NFP_BPF_PACKET_DATA 8
enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_FUNC = 1,
NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
......@@ -81,6 +89,7 @@ enum nfp_bpf_cmsg_type {
CMSG_TYPE_MAP_DELETE = 5,
CMSG_TYPE_MAP_GETNEXT = 6,
CMSG_TYPE_MAP_GETFIRST = 7,
CMSG_TYPE_BPF_EVENT = 8,
__CMSG_TYPE_MAP_MAX,
};
......@@ -155,4 +164,13 @@ struct cmsg_reply_map_op {
__be32 resv;
struct cmsg_key_value_pair elem[0];
};
struct cmsg_bpf_event {
struct cmsg_hdr hdr;
__be32 cpu_id;
__be64 map_ptr;
__be32 data_size;
__be32 pkt_size;
u8 data[0];
};
#endif
/*
* Copyright (C) 2016-2017 Netronome Systems, Inc.
* Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -1395,15 +1395,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int
map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
struct bpf_offloaded_map *offmap;
struct nfp_bpf_map *nfp_map;
bool load_lm_ptr;
u32 ret_tgt;
s64 lm_off;
swreg tid;
offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
nfp_map = offmap->dev_priv;
/* We only have to reload LM0 if the key is not at start of stack */
lm_off = nfp_prog->stack_depth;
......@@ -1416,17 +1410,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
if (meta->func_id == BPF_FUNC_map_update_elem)
emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
/* Load map ID into a register, it should actually fit as an immediate
* but in case it doesn't deal with it here, not in the delay slots.
*/
tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
2, RELO_BR_HELPER);
ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
/* Load map ID into A0 */
wrp_mov(nfp_prog, reg_a(0), tid);
wrp_mov(nfp_prog, reg_a(0), reg_a(2));
/* Load the return address into B0 */
wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
......@@ -1456,6 +1445,31 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
static int
nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
swreg ptr_type;
u32 ret_tgt;
ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
2, RELO_BR_HELPER);
/* Load ptr type into A1 */
wrp_mov(nfp_prog, reg_a(1), ptr_type);
/* Load the return address into B0 */
wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
return -EINVAL;
return 0;
}
/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
......@@ -2411,6 +2425,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return map_call_stack_common(nfp_prog, meta);
case BPF_FUNC_get_prandom_u32:
return nfp_get_prandom_u32(nfp_prog, meta);
case BPF_FUNC_perf_event_output:
return nfp_perf_event_output(nfp_prog, meta);
default:
WARN_ONCE(1, "verifier allowed unsupported function\n");
return -EOPNOTSUPP;
......@@ -3227,6 +3243,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
return 0;
}
static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
{
struct nfp_insn_meta *meta1, *meta2;
struct nfp_bpf_map *nfp_map;
struct bpf_map *map;
nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
if (meta1->skip || meta2->skip)
continue;
if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
continue;
map = (void *)(unsigned long)((u32)meta1->insn.imm |
(u64)meta2->insn.imm << 32);
if (bpf_map_offload_neutral(map))
continue;
nfp_map = map_to_offmap(map)->dev_priv;
meta1->insn.imm = nfp_map->tid;
meta2->insn.imm = 0;
}
return 0;
}
static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
{
__le64 *ustore = (__force __le64 *)prog;
......@@ -3263,6 +3306,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog)
{
int ret;
ret = nfp_bpf_replace_map_ptrs(nfp_prog);
if (ret)
return ret;
ret = nfp_bpf_optimize(nfp_prog);
if (ret)
return ret;
......@@ -3353,6 +3400,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
case BPF_FUNC_map_delete_elem:
val = nfp_prog->bpf->helpers.map_delete;
break;
case BPF_FUNC_perf_event_output:
val = nfp_prog->bpf->helpers.perf_event_output;
break;
default:
pr_err("relocation of unknown helper %d\n",
val);
......
/*
* Copyright (C) 2017 Netronome Systems, Inc.
* Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -43,6 +43,14 @@
#include "fw.h"
#include "main.h"
const struct rhashtable_params nfp_bpf_maps_neutral_params = {
.nelem_hint = 4,
.key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
.key_offset = offsetof(struct nfp_bpf_neutral_map, ptr),
.head_offset = offsetof(struct nfp_bpf_neutral_map, l),
.automatic_shrinking = true,
};
static bool nfp_net_ebpf_capable(struct nfp_net *nn)
{
#ifdef __LITTLE_ENDIAN
......@@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
case BPF_FUNC_map_delete_elem:
bpf->helpers.map_delete = readl(&cap->func_addr);
break;
case BPF_FUNC_perf_event_output:
bpf->helpers.perf_event_output = readl(&cap->func_addr);
break;
}
return 0;
......@@ -401,17 +412,28 @@ static int nfp_bpf_init(struct nfp_app *app)
init_waitqueue_head(&bpf->cmsg_wq);
INIT_LIST_HEAD(&bpf->map_list);
err = nfp_bpf_parse_capabilities(app);
err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
if (err)
goto err_free_bpf;
err = nfp_bpf_parse_capabilities(app);
if (err)
goto err_free_neutral_maps;
return 0;
err_free_neutral_maps:
rhashtable_destroy(&bpf->maps_neutral);
err_free_bpf:
kfree(bpf);
return err;
}
static void nfp_check_rhashtable_empty(void *ptr, void *arg)
{
WARN_ON_ONCE(1);
}
static void nfp_bpf_clean(struct nfp_app *app)
{
struct nfp_app_bpf *bpf = app->priv;
......@@ -419,6 +441,8 @@ static void nfp_bpf_clean(struct nfp_app *app)
WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
WARN_ON(!list_empty(&bpf->map_list));
WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
rhashtable_free_and_destroy(&bpf->maps_neutral,
nfp_check_rhashtable_empty, NULL);
kfree(bpf);
}
......
/*
* Copyright (C) 2016-2017 Netronome Systems, Inc.
* Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -39,6 +39,7 @@
#include <linux/bpf_verifier.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/rhashtable.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/wait.h>
......@@ -114,6 +115,8 @@ enum pkt_vec {
* @maps_in_use: number of currently offloaded maps
* @map_elems_in_use: number of elements allocated to offloaded maps
*
* @maps_neutral: hash table of offload-neutral maps (on pointer)
*
* @adjust_head: adjust head capability
* @adjust_head.flags: extra flags for adjust head
* @adjust_head.off_min: minimal packet offset within buffer required
......@@ -133,6 +136,7 @@ enum pkt_vec {
* @helpers.map_lookup: map lookup helper address
* @helpers.map_update: map update helper address
* @helpers.map_delete: map delete helper address
* @helpers.perf_event_output: output perf event to a ring buffer
*
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
*/
......@@ -150,6 +154,8 @@ struct nfp_app_bpf {
unsigned int maps_in_use;
unsigned int map_elems_in_use;
struct rhashtable maps_neutral;
struct nfp_bpf_cap_adjust_head {
u32 flags;
int off_min;
......@@ -171,6 +177,7 @@ struct nfp_app_bpf {
u32 map_lookup;
u32 map_update;
u32 map_delete;
u32 perf_event_output;
} helpers;
bool pseudo_random;
......@@ -199,6 +206,14 @@ struct nfp_bpf_map {
enum nfp_bpf_map_use use_map[];
};
struct nfp_bpf_neutral_map {
struct rhash_head l;
struct bpf_map *ptr;
u32 count;
};
extern const struct rhashtable_params nfp_bpf_maps_neutral_params;
struct nfp_prog;
struct nfp_insn_meta;
typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
......@@ -367,6 +382,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
* @error: error code if something went wrong
* @stack_depth: max stack depth from the verifier
* @adjust_head_location: if program has single adjust head call - the insn no.
* @map_records_cnt: the number of map pointers recorded for this prog
* @map_records: the map record pointers from bpf->maps_neutral
* @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
*/
struct nfp_prog {
......@@ -390,6 +407,9 @@ struct nfp_prog {
unsigned int stack_depth;
unsigned int adjust_head_location;
unsigned int map_records_cnt;
struct nfp_bpf_neutral_map **map_records;
struct list_head insns;
};
......@@ -440,5 +460,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
void *key, void *next_key);
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
#endif
/*
* Copyright (C) 2016-2017 Netronome Systems, Inc.
* Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -56,6 +56,126 @@
#include "../nfp_net_ctrl.h"
#include "../nfp_net.h"
static int
nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
struct bpf_map *map)
{
struct nfp_bpf_neutral_map *record;
int err;
/* Map record paths are entered via ndo, update side is protected. */
ASSERT_RTNL();
/* Reuse path - other offloaded program is already tracking this map. */
record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
nfp_bpf_maps_neutral_params);
if (record) {
nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
record->count++;
return 0;
}
/* Grab a single ref to the map for our record. The prog destroy ndo
* happens after free_used_maps().
*/
map = bpf_map_inc(map, false);
if (IS_ERR(map))
return PTR_ERR(map);
record = kmalloc(sizeof(*record), GFP_KERNEL);
if (!record) {
err = -ENOMEM;
goto err_map_put;
}
record->ptr = map;
record->count = 1;
err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
nfp_bpf_maps_neutral_params);
if (err)
goto err_free_rec;
nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
return 0;
err_free_rec:
kfree(record);
err_map_put:
bpf_map_put(map);
return err;
}
static void
nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
{
bool freed = false;
int i;
ASSERT_RTNL();
for (i = 0; i < nfp_prog->map_records_cnt; i++) {
if (--nfp_prog->map_records[i]->count) {
nfp_prog->map_records[i] = NULL;
continue;
}
WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral,
&nfp_prog->map_records[i]->l,
nfp_bpf_maps_neutral_params));
freed = true;
}
if (freed) {
synchronize_rcu();
for (i = 0; i < nfp_prog->map_records_cnt; i++)
if (nfp_prog->map_records[i]) {
bpf_map_put(nfp_prog->map_records[i]->ptr);
kfree(nfp_prog->map_records[i]);
}
}
kfree(nfp_prog->map_records);
nfp_prog->map_records = NULL;
nfp_prog->map_records_cnt = 0;
}
static int
nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
struct bpf_prog *prog)
{
int i, cnt, err;
/* Quickly count the maps we will have to remember */
cnt = 0;
for (i = 0; i < prog->aux->used_map_cnt; i++)
if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
cnt++;
if (!cnt)
return 0;
nfp_prog->map_records = kmalloc_array(cnt,
sizeof(nfp_prog->map_records[0]),
GFP_KERNEL);
if (!nfp_prog->map_records)
return -ENOMEM;
for (i = 0; i < prog->aux->used_map_cnt; i++)
if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
err = nfp_map_ptr_record(bpf, nfp_prog,
prog->aux->used_maps[i]);
if (err) {
nfp_map_ptrs_forget(bpf, nfp_prog);
return err;
}
}
WARN_ON(cnt != nfp_prog->map_records_cnt);
return 0;
}
static int
nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
unsigned int cnt)
......@@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
prog->aux->offload->jited_image = nfp_prog->prog;
return 0;
return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
}
static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
......@@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
kvfree(nfp_prog->prog);
nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
nfp_prog_free(nfp_prog);
return 0;
......@@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
}
}
static unsigned long
nfp_bpf_perf_event_copy(void *dst, const void *src,
unsigned long off, unsigned long len)
{
memcpy(dst, src + off, len);
return 0;
}
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
{
struct cmsg_bpf_event *cbe = (void *)skb->data;
u32 pkt_size, data_size;
struct bpf_map *map;
if (skb->len < sizeof(struct cmsg_bpf_event))
goto err_drop;
pkt_size = be32_to_cpu(cbe->pkt_size);
data_size = be32_to_cpu(cbe->data_size);
map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
goto err_drop;
if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
goto err_drop;
rcu_read_lock();
if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
nfp_bpf_maps_neutral_params)) {
rcu_read_unlock();
pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
map);
goto err_drop;
}
bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
&cbe->data[round_up(pkt_size, 4)], data_size,
cbe->data, pkt_size, nfp_bpf_perf_event_copy);
rcu_read_unlock();
dev_consume_skb_any(skb);
return 0;
err_drop:
dev_kfree_skb_any(skb);
return -EINVAL;
}
static int
nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
......
/*
* Copyright (C) 2016-2017 Netronome Systems, Inc.
* Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -36,6 +36,8 @@
#include <linux/kernel.h>
#include <linux/pkt_cls.h>
#include "../nfp_app.h"
#include "../nfp_main.h"
#include "fw.h"
#include "main.h"
......@@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
return false;
}
/* Rest of the checks is only if we re-parse the same insn */
if (!meta->func_id)
return true;
if (meta->arg1.map_ptr != reg1->map_ptr) {
pr_vlog(env, "%s: called for different map\n", fname);
return false;
}
return true;
}
......@@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
return -EOPNOTSUPP;
case BPF_FUNC_perf_event_output:
BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE ||
NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE ||
NFP_BPF_STACK != PTR_TO_STACK ||
NFP_BPF_PACKET_DATA != PTR_TO_PACKET);
if (!bpf->helpers.perf_event_output) {
pr_vlog(env, "event_output: not supported by FW\n");
return -EOPNOTSUPP;
}
/* Force current CPU to make sure we can report the event
* wherever we get the control message from FW.
*/
if (reg3->var_off.mask & BPF_F_INDEX_MASK ||
(reg3->var_off.value & BPF_F_INDEX_MASK) !=
BPF_F_CURRENT_CPU) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off);
pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n",
tn_buf);
return -EOPNOTSUPP;
}
/* Save space in meta, we don't care about arguments other
* than 4th meta, shove it into arg1.
*/
reg1 = cur_regs(env) + BPF_REG_4;
if (reg1->type != SCALAR_VALUE /* NULL ptr */ &&
reg1->type != PTR_TO_STACK &&
reg1->type != PTR_TO_MAP_VALUE &&
reg1->type != PTR_TO_PACKET) {
pr_vlog(env, "event_output: unsupported ptr type: %d\n",
reg1->type);
return -EOPNOTSUPP;
}
if (reg1->type == PTR_TO_STACK &&
!nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL))
return -EOPNOTSUPP;
/* Warn user that on offload NFP may return success even if map
* is not going to accept the event, since the event output is
* fully async and device won't know the state of the map.
* There is also FW limitation on the event length.
*
* Lost events will not show up on the perf ring, driver
* won't see them at all. Events may also get reordered.
*/
dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev,
"bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n");
pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n");
if (!meta->func_id)
break;
if (reg1->type != meta->arg1.type) {
pr_vlog(env, "event_output: ptr type changed: %d %d\n",
meta->arg1.type, reg1->type);
return -EINVAL;
}
break;
default:
pr_vlog(env, "unsupported function id: %d\n", func_id);
return -EOPNOTSUPP;
......
/*
* Copyright (C) 2017 Netronome Systems, Inc.
* Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......
......@@ -110,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
return container_of(map, struct bpf_offloaded_map, map);
}
static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
{
return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
}
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
return map->ops->map_seq_show_elem && map->ops->map_check_btf;
......@@ -235,6 +240,8 @@ struct bpf_verifier_ops {
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
int (*gen_ld_abs)(const struct bpf_insn *orig,
struct bpf_insn *insn_buf);
u32 (*convert_ctx_access)(enum bpf_access_type type,
const struct bpf_insn *src,
struct bpf_insn *dst,
......@@ -676,6 +683,31 @@ static inline int sock_map_prog(struct bpf_map *map,
}
#endif
#if defined(CONFIG_XDP_SOCKETS)
struct xdp_sock;
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs);
void __xsk_map_flush(struct bpf_map *map);
#else
struct xdp_sock;
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
u32 key)
{
return NULL;
}
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs)
{
return -EOPNOTSUPP;
}
static inline void __xsk_map_flush(struct bpf_map *map)
{
}
#endif
/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
......@@ -689,9 +721,8 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
/* Shared helpers among cBPF and eBPF. */
......
......@@ -2,7 +2,6 @@
#ifndef __LINUX_BPF_TRACE_H__
#define __LINUX_BPF_TRACE_H__
#include <trace/events/bpf.h>
#include <trace/events/xdp.h>
#endif /* __LINUX_BPF_TRACE_H__ */
......@@ -49,4 +49,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
#endif
......@@ -173,6 +173,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
#define BPF_MAX_SUBPROGS 256
struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
};
/* single container for all structs
* one verifier_env per bpf_check() call
*/
......@@ -191,9 +196,7 @@ struct bpf_verifier_env {
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
struct bpf_verifier_log log;
u32 subprog_starts[BPF_MAX_SUBPROGS];
/* computes the stack depth of each bpf function */
u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
u32 subprog_cnt;
};
......
......@@ -47,7 +47,9 @@ struct xdp_buff;
/* Additional register mappings for converted user programs. */
#define BPF_REG_A BPF_REG_0
#define BPF_REG_X BPF_REG_7
#define BPF_REG_TMP BPF_REG_8
#define BPF_REG_TMP BPF_REG_2 /* scratch reg */
#define BPF_REG_D BPF_REG_8 /* data, callee-saved */
#define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */
/* Kernel hidden auxiliary/helper register for hardening step.
* Only used by eBPF JITs. It's nothing more than a temporary
......@@ -468,7 +470,8 @@ struct bpf_prog {
dst_needed:1, /* Do we need dst entry? */
blinded:1, /* Was blinded */
is_func:1, /* program is a bpf function */
kprobe_override:1; /* Do we override a kprobe? */
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1; /* callchain buffer allocated? */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
......@@ -759,7 +762,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
* This does not appear to be a real limitation for existing software.
*/
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct bpf_prog *prog);
struct xdp_buff *xdp, struct bpf_prog *prog);
int xdp_do_redirect(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *prog);
......
......@@ -2510,6 +2510,7 @@ void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
int dev_queue_xmit(struct sk_buff *skb);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
int register_netdevice(struct net_device *dev);
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
void unregister_netdevice_many(struct list_head *head);
......
......@@ -207,8 +207,9 @@ struct ucred {
* PF_SMC protocol family that
* reuses AF_INET address family
*/
#define AF_XDP 44 /* XDP sockets */
#define AF_MAX 44 /* For now.. */
#define AF_MAX 45 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
......@@ -257,6 +258,7 @@ struct ucred {
#define PF_KCM AF_KCM
#define PF_QIPCRTR AF_QIPCRTR
#define PF_SMC AF_SMC
#define PF_XDP AF_XDP
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
......@@ -338,6 +340,7 @@ struct ucred {
#define SOL_NFC 280
#define SOL_KCM 281
#define SOL_TLS 282
#define SOL_XDP 283
/* IPX options */
#define IPX_TYPE 1
......
......@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
/* Arithmetic and logical ops */
/* Shift a tnum left (by a fixed shift) */
struct tnum tnum_lshift(struct tnum a, u8 shift);
/* Shift a tnum right (by a fixed shift) */
/* Shift (rsh) a tnum right (by a fixed shift) */
struct tnum tnum_rshift(struct tnum a, u8 shift);
/* Shift (arsh) a tnum right (by a fixed min_shift) */
struct tnum tnum_arshift(struct tnum a, u8 min_shift);
/* Add two tnums, return @a + @b */
struct tnum tnum_add(struct tnum a, struct tnum b);
/* Subtract two tnums, return @a - @b */
......
......@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
}
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
......
/* SPDX-License-Identifier: GPL-2.0
* AF_XDP internal functions
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _LINUX_XDP_SOCK_H
#define _LINUX_XDP_SOCK_H
#include <linux/mutex.h>
#include <net/sock.h>
struct net_device;
struct xsk_queue;
struct xdp_umem;
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
struct xsk_queue *rx;
struct net_device *dev;
struct xdp_umem *umem;
struct list_head flush_node;
u16 queue_id;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
/* Protects multiple processes in the control path */
struct mutex mutex;
u64 rx_dropped;
};
struct xdp_buff;
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -ENOTSUPP;
}
static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -ENOTSUPP;
}
static inline void xsk_flush(struct xdp_sock *xs)
{
}
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return false;
}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM bpf
#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_BPF_H
/* These are only used within the BPF_SYSCALL code */
#ifdef CONFIG_BPF_SYSCALL
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/fs.h>
#include <linux/tracepoint.h>
#define __PROG_TYPE_MAP(FN) \
FN(SOCKET_FILTER) \
FN(KPROBE) \
FN(SCHED_CLS) \
FN(SCHED_ACT) \
FN(TRACEPOINT) \
FN(XDP) \
FN(PERF_EVENT) \
FN(CGROUP_SKB) \
FN(CGROUP_SOCK) \
FN(LWT_IN) \
FN(LWT_OUT) \
FN(LWT_XMIT)
#define __MAP_TYPE_MAP(FN) \
FN(HASH) \
FN(ARRAY) \
FN(PROG_ARRAY) \
FN(PERF_EVENT_ARRAY) \
FN(PERCPU_HASH) \
FN(PERCPU_ARRAY) \
FN(STACK_TRACE) \
FN(CGROUP_ARRAY) \
FN(LRU_HASH) \
FN(LRU_PERCPU_HASH) \
FN(LPM_TRIE)
#define __PROG_TYPE_TP_FN(x) \
TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x);
#define __PROG_TYPE_SYM_FN(x) \
{ BPF_PROG_TYPE_##x, #x },
#define __PROG_TYPE_SYM_TAB \
__PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 }
__PROG_TYPE_MAP(__PROG_TYPE_TP_FN)
#define __MAP_TYPE_TP_FN(x) \
TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x);
#define __MAP_TYPE_SYM_FN(x) \
{ BPF_MAP_TYPE_##x, #x },
#define __MAP_TYPE_SYM_TAB \
__MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 }
__MAP_TYPE_MAP(__MAP_TYPE_TP_FN)
DECLARE_EVENT_CLASS(bpf_prog_event,
TP_PROTO(const struct bpf_prog *prg),
TP_ARGS(prg),
TP_STRUCT__entry(
__array(u8, prog_tag, 8)
__field(u32, type)
),
TP_fast_assign(
BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
__entry->type = prg->type;
),
TP_printk("prog=%s type=%s",
__print_hex_str(__entry->prog_tag, 8),
__print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB))
);
DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type,
TP_PROTO(const struct bpf_prog *prg),
TP_ARGS(prg)
);
DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu,
TP_PROTO(const struct bpf_prog *prg),
TP_ARGS(prg)
);
TRACE_EVENT(bpf_prog_load,
TP_PROTO(const struct bpf_prog *prg, int ufd),
TP_ARGS(prg, ufd),
TP_STRUCT__entry(
__array(u8, prog_tag, 8)
__field(u32, type)
__field(int, ufd)
),
TP_fast_assign(
BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
__entry->type = prg->type;
__entry->ufd = ufd;
),
TP_printk("prog=%s type=%s ufd=%d",
__print_hex_str(__entry->prog_tag, 8),
__print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB),
__entry->ufd)
);
TRACE_EVENT(bpf_map_create,
TP_PROTO(const struct bpf_map *map, int ufd),
TP_ARGS(map, ufd),
TP_STRUCT__entry(
__field(u32, type)
__field(u32, size_key)
__field(u32, size_value)
__field(u32, max_entries)
__field(u32, flags)
__field(int, ufd)
),
TP_fast_assign(
__entry->type = map->map_type;
__entry->size_key = map->key_size;
__entry->size_value = map->value_size;
__entry->max_entries = map->max_entries;
__entry->flags = map->map_flags;
__entry->ufd = ufd;
),
TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x",
__print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
__entry->ufd, __entry->size_key, __entry->size_value,
__entry->max_entries, __entry->flags)
);
DECLARE_EVENT_CLASS(bpf_obj_prog,
TP_PROTO(const struct bpf_prog *prg, int ufd,
const struct filename *pname),
TP_ARGS(prg, ufd, pname),
TP_STRUCT__entry(
__array(u8, prog_tag, 8)
__field(int, ufd)
__string(path, pname->name)
),
TP_fast_assign(
BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
__assign_str(path, pname->name);
__entry->ufd = ufd;
),
TP_printk("prog=%s path=%s ufd=%d",
__print_hex_str(__entry->prog_tag, 8),
__get_str(path), __entry->ufd)
);
DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog,
TP_PROTO(const struct bpf_prog *prg, int ufd,
const struct filename *pname),
TP_ARGS(prg, ufd, pname)
);
DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog,
TP_PROTO(const struct bpf_prog *prg, int ufd,
const struct filename *pname),
TP_ARGS(prg, ufd, pname)
);
DECLARE_EVENT_CLASS(bpf_obj_map,
TP_PROTO(const struct bpf_map *map, int ufd,
const struct filename *pname),
TP_ARGS(map, ufd, pname),
TP_STRUCT__entry(
__field(u32, type)
__field(int, ufd)
__string(path, pname->name)
),
TP_fast_assign(
__assign_str(path, pname->name);
__entry->type = map->map_type;
__entry->ufd = ufd;
),
TP_printk("map type=%s ufd=%d path=%s",
__print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
__entry->ufd, __get_str(path))
);
DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map,
TP_PROTO(const struct bpf_map *map, int ufd,
const struct filename *pname),
TP_ARGS(map, ufd, pname)
);
DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map,
TP_PROTO(const struct bpf_map *map, int ufd,
const struct filename *pname),
TP_ARGS(map, ufd, pname)
);
DECLARE_EVENT_CLASS(bpf_map_keyval,
TP_PROTO(const struct bpf_map *map, int ufd,
const void *key, const void *val),
TP_ARGS(map, ufd, key, val),
TP_STRUCT__entry(
__field(u32, type)
__field(u32, key_len)
__dynamic_array(u8, key, map->key_size)
__field(bool, key_trunc)
__field(u32, val_len)
__dynamic_array(u8, val, map->value_size)
__field(bool, val_trunc)
__field(int, ufd)
),
TP_fast_assign(
memcpy(__get_dynamic_array(key), key, map->key_size);
memcpy(__get_dynamic_array(val), val, map->value_size);
__entry->type = map->map_type;
__entry->key_len = min(map->key_size, 16U);
__entry->key_trunc = map->key_size != __entry->key_len;
__entry->val_len = min(map->value_size, 16U);
__entry->val_trunc = map->value_size != __entry->val_len;
__entry->ufd = ufd;
),
TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]",
__print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
__entry->ufd,
__print_hex(__get_dynamic_array(key), __entry->key_len),
__entry->key_trunc ? " ..." : "",
__print_hex(__get_dynamic_array(val), __entry->val_len),
__entry->val_trunc ? " ..." : "")
);
DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem,
TP_PROTO(const struct bpf_map *map, int ufd,
const void *key, const void *val),
TP_ARGS(map, ufd, key, val)
);
DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem,
TP_PROTO(const struct bpf_map *map, int ufd,
const void *key, const void *val),
TP_ARGS(map, ufd, key, val)
);
TRACE_EVENT(bpf_map_delete_elem,
TP_PROTO(const struct bpf_map *map, int ufd,
const void *key),
TP_ARGS(map, ufd, key),
TP_STRUCT__entry(
__field(u32, type)
__field(u32, key_len)
__dynamic_array(u8, key, map->key_size)
__field(bool, key_trunc)
__field(int, ufd)
),
TP_fast_assign(
memcpy(__get_dynamic_array(key), key, map->key_size);
__entry->type = map->map_type;
__entry->key_len = min(map->key_size, 16U);
__entry->key_trunc = map->key_size != __entry->key_len;
__entry->ufd = ufd;
),
TP_printk("map type=%s ufd=%d key=[%s%s]",
__print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
__entry->ufd,
__print_hex(__get_dynamic_array(key), __entry->key_len),
__entry->key_trunc ? " ..." : "")
);
TRACE_EVENT(bpf_map_next_key,
TP_PROTO(const struct bpf_map *map, int ufd,
const void *key, const void *key_next),
TP_ARGS(map, ufd, key, key_next),
TP_STRUCT__entry(
__field(u32, type)
__field(u32, key_len)
__dynamic_array(u8, key, map->key_size)
__dynamic_array(u8, nxt, map->key_size)
__field(bool, key_trunc)
__field(bool, key_null)
__field(int, ufd)
),
TP_fast_assign(
if (key)
memcpy(__get_dynamic_array(key), key, map->key_size);
__entry->key_null = !key;
memcpy(__get_dynamic_array(nxt), key_next, map->key_size);
__entry->type = map->map_type;
__entry->key_len = min(map->key_size, 16U);
__entry->key_trunc = map->key_size != __entry->key_len;
__entry->ufd = ufd;
),
TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
__print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
__entry->ufd,
__entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),
__entry->key_len),
__entry->key_trunc && !__entry->key_null ? " ..." : "",
__print_hex(__get_dynamic_array(nxt), __entry->key_len),
__entry->key_trunc ? " ..." : "")
);
#endif /* CONFIG_BPF_SYSCALL */
#endif /* _TRACE_BPF_H */
#include <trace/define_trace.h>
......@@ -116,6 +116,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
};
enum bpf_prog_type {
......@@ -828,12 +829,12 @@ union bpf_attr {
*
* Also, be aware that the newer helper
* **bpf_perf_event_read_value**\ () is recommended over
* **bpf_perf_event_read*\ () in general. The latter has some ABI
* **bpf_perf_event_read**\ () in general. The latter has some ABI
* quirks where error and counter value are used as a return code
* (which is wrong to do since ranges may overlap). This issue is
* fixed with bpf_perf_event_read_value(), which at the same time
* provides more features over the **bpf_perf_event_read**\ ()
* interface. Please refer to the description of
* fixed with **bpf_perf_event_read_value**\ (), which at the same
* time provides more features over the **bpf_perf_event_read**\
* () interface. Please refer to the description of
* **bpf_perf_event_read_value**\ () for details.
* Return
* The value of the perf event counter read from the map, or a
......@@ -1361,7 +1362,7 @@ union bpf_attr {
* Return
* 0
*
* int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
* int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
* Description
* Emulate a call to **setsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
......@@ -1435,7 +1436,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
* int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a *map* referencing sockets. The
* *skops* is used as a new value for the entry associated to
......@@ -1533,7 +1534,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
* int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* For en eBPF program attached to a perf event, retrieve the
* value of the event counter associated to *ctx* and store it in
......@@ -1544,7 +1545,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
* int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
* Description
* Emulate a call to **getsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
......@@ -1588,7 +1589,7 @@ union bpf_attr {
* Return
* 0
*
* int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
* int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
* Description
* Attempt to set the value of the **bpf_sock_ops_cb_flags** field
* for the full TCP socket associated to *bpf_sock_ops* to
......@@ -1721,7 +1722,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
* int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
* Description
* Bind the socket associated to *ctx* to the address pointed by
* *addr*, of length *addr_len*. This allows for making outgoing
......@@ -1767,6 +1768,64 @@ union bpf_attr {
* **CONFIG_XFRM** configuration option.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *ctx*, which is a pointer
* to the context on which the tracing program is executed.
* To store the stacktrace, the bpf program provides *buf* with
* a nonnegative *size*.
*
* The last argument, *flags*, holds the number of stack frames to
* skip (from 0 to 255), masked with
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
* the following flags:
*
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
* **BPF_F_USER_BUILD_ID**
* Collect buildid+offset instead of ips for user stack,
* only valid if **BPF_F_USER_STACK** is also specified.
*
* **bpf_get_stack**\ () can collect up to
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
* to sufficient large buffer size. Note that
* this limit can be controlled with the **sysctl** program, and
* that it should be manually increased in order to profile long
* user stacks (such as stacks for Java programs). To do so, use:
*
* ::
*
* # sysctl kernel.perf_event_max_stack=<new value>
*
* Return
* a non-negative value equal to or less than size on success, or
* a negative error in case of failure.
*
* int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
* This helper is similar to **bpf_skb_load_bytes**\ () in that
* it provides an easy way to load *len* bytes from *offset*
* from the packet associated to *skb*, into the buffer pointed
* by *to*. The difference to **bpf_skb_load_bytes**\ () is that
* a fifth argument *start_header* exists in order to select a
* base offset to start from. *start_header* can be one of:
*
* **BPF_HDR_START_MAC**
* Base offset to load data from is *skb*'s mac header.
* **BPF_HDR_START_NET**
* Base offset to load data from is *skb*'s network header.
*
* In general, "direct packet access" is the preferred method to
* access packet data, however, this helper is in particular useful
* in socket filters where *skb*\ **->data** does not always point
* to the start of the mac header and where "direct packet access"
* is not available.
*
* Return
* 0 on success, or a negative error in case of failure.
*
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -1835,7 +1894,9 @@ union bpf_attr {
FN(msg_pull_data), \
FN(bind), \
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state),
FN(skb_get_xfrm_state), \
FN(get_stack), \
FN(skb_load_bytes_relative),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -1869,11 +1930,14 @@ enum bpf_func_id {
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
/* BPF_FUNC_get_stackid flags. */
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
#define BPF_F_SKIP_FIELD_MASK 0xffULL
#define BPF_F_USER_STACK (1ULL << 8)
/* flags used by BPF_FUNC_get_stackid only. */
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
#define BPF_F_REUSE_STACKID (1ULL << 10)
/* flags used by BPF_FUNC_get_stack only. */
#define BPF_F_USER_BUILD_ID (1ULL << 11)
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
......@@ -1893,6 +1957,12 @@ enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
};
/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
enum bpf_hdr_start_off {
BPF_HDR_START_MAC,
BPF_HDR_START_NET,
};
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
* if_xdp: XDP socket user-space interface
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* Author(s): Björn Töpel <bjorn.topel@intel.com>
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
#ifndef _LINUX_IF_XDP_H
#define _LINUX_IF_XDP_H
#include <linux/types.h>
/* Options for the sxdp_flags field */
#define XDP_SHARED_UMEM 1
struct sockaddr_xdp {
__u16 sxdp_family;
__u32 sxdp_ifindex;
__u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd;
__u16 sxdp_flags;
};
/* XDP socket options */
#define XDP_RX_RING 1
#define XDP_TX_RING 2
#define XDP_UMEM_REG 3
#define XDP_UMEM_FILL_RING 4
#define XDP_UMEM_COMPLETION_RING 5
#define XDP_STATISTICS 6
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
__u32 frame_size; /* Frame size */
__u32 frame_headroom; /* Frame head room */
};
struct xdp_statistics {
__u64 rx_dropped; /* Dropped for reasons other than invalid desc */
__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
};
/* Pgoff for mmaping the rings */
#define XDP_PGOFF_RX_RING 0
#define XDP_PGOFF_TX_RING 0x80000000
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
struct xdp_desc {
__u32 idx;
__u32 len;
__u16 offset;
__u8 flags;
__u8 padding[5];
};
struct xdp_ring {
__u32 producer __attribute__((aligned(64)));
__u32 consumer __attribute__((aligned(64)));
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] __attribute__((aligned(64)));
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
__u32 desc[0] __attribute__((aligned(64)));
};
#endif /* _LINUX_IF_XDP_H */
......@@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
ifeq ($(CONFIG_XDP_SOCKETS),y)
obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
endif
obj-$(CONFIG_BPF_SYSCALL) += offload.o
ifeq ($(CONFIG_STREAM_PARSER),y)
ifeq ($(CONFIG_INET),y)
......
......@@ -31,6 +31,7 @@
#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
#include <asm/unaligned.h>
......@@ -633,23 +634,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
break;
case BPF_LD | BPF_ABS | BPF_W:
case BPF_LD | BPF_ABS | BPF_H:
case BPF_LD | BPF_ABS | BPF_B:
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
break;
case BPF_LD | BPF_IND | BPF_W:
case BPF_LD | BPF_IND | BPF_H:
case BPF_LD | BPF_IND | BPF_B:
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
break;
case BPF_LD | BPF_IMM | BPF_DW:
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
......@@ -890,14 +874,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
INSN_3(LDX, MEM, W), \
INSN_3(LDX, MEM, DW), \
/* Immediate based. */ \
INSN_3(LD, IMM, DW), \
/* Misc (old cBPF carry-over). */ \
INSN_3(LD, ABS, B), \
INSN_3(LD, ABS, H), \
INSN_3(LD, ABS, W), \
INSN_3(LD, IND, B), \
INSN_3(LD, IND, H), \
INSN_3(LD, IND, W)
INSN_3(LD, IMM, DW)
bool bpf_opcode_in_insntable(u8 code)
{
......@@ -907,6 +884,13 @@ bool bpf_opcode_in_insntable(u8 code)
[0 ... 255] = false,
/* Now overwrite non-defaults ... */
BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
[BPF_LD | BPF_ABS | BPF_B] = true,
[BPF_LD | BPF_ABS | BPF_H] = true,
[BPF_LD | BPF_ABS | BPF_W] = true,
[BPF_LD | BPF_IND | BPF_B] = true,
[BPF_LD | BPF_IND | BPF_H] = true,
[BPF_LD | BPF_IND | BPF_W] = true,
};
#undef BPF_INSN_3_TBL
#undef BPF_INSN_2_TBL
......@@ -937,8 +921,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
#undef BPF_INSN_3_LBL
#undef BPF_INSN_2_LBL
u32 tail_call_cnt = 0;
void *ptr;
int off;
#define CONT ({ insn++; goto select_insn; })
#define CONT_JMP ({ insn++; goto select_insn; })
......@@ -1265,67 +1247,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
(DST + insn->off));
CONT;
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
off = IMM;
load_word:
/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
* appearing in the programs where ctx == skb
* (see may_access_skb() in the verifier). All programs
* keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
* bpf_convert_filter() saves it in BPF_R6, internal BPF
* verifier will check that BPF_R6 == ctx.
*
* BPF_ABS and BPF_IND are wrappers of function calls,
* so they scratch BPF_R1-BPF_R5 registers, preserve
* BPF_R6-BPF_R9, and store return value into BPF_R0.
*
* Implicit input:
* ctx == skb == BPF_R6 == CTX
*
* Explicit input:
* SRC == any register
* IMM == 32-bit immediate
*
* Output:
* BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
*/
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be32(ptr);
CONT;
}
return 0;
LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
off = IMM;
load_half:
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be16(ptr);
CONT;
}
return 0;
LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
off = IMM;
load_byte:
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = *(u8 *)ptr;
CONT;
}
return 0;
LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
off = IMM + SRC;
goto load_word;
LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
off = IMM + SRC;
goto load_half;
LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
off = IMM + SRC;
goto load_byte;
default_label:
/* If we ever reach this, we have a bug somewhere. Die hard here
......@@ -1722,6 +1643,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
aux = container_of(work, struct bpf_prog_aux, work);
if (bpf_prog_is_dev_bound(aux))
bpf_prog_offload_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
#endif
for (i = 0; i < aux->func_cnt; i++)
bpf_jit_free(aux->func[i]);
if (aux->func_cnt) {
......@@ -1794,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
{
return -ENOTSUPP;
}
EXPORT_SYMBOL_GPL(bpf_event_output);
/* Always built-in helper functions. */
const struct bpf_func_proto bpf_tail_call_proto = {
......@@ -1840,9 +1766,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
#include <linux/bpf_trace.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
/* These are only used within the BPF_SYSCALL code */
#ifdef CONFIG_BPF_SYSCALL
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
#endif
......@@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
ret = bpf_obj_do_pin(pname, raw, type);
if (ret != 0)
bpf_any_put(raw, type);
if ((trace_bpf_obj_pin_prog_enabled() ||
trace_bpf_obj_pin_map_enabled()) && !ret) {
if (type == BPF_TYPE_PROG)
trace_bpf_obj_pin_prog(raw, ufd, pname);
if (type == BPF_TYPE_MAP)
trace_bpf_obj_pin_map(raw, ufd, pname);
}
out:
putname(pname);
return ret;
......@@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
else
goto out;
if (ret < 0) {
if (ret < 0)
bpf_any_put(raw, type);
} else if (trace_bpf_obj_get_prog_enabled() ||
trace_bpf_obj_get_map_enabled()) {
if (type == BPF_TYPE_PROG)
trace_bpf_obj_get_prog(raw, ret, pname);
if (type == BPF_TYPE_MAP)
trace_bpf_obj_get_map(raw, ret, pname);
}
out:
putname(pname);
return ret;
......
/*
* Copyright (C) 2017 Netronome Systems, Inc.
* Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
......@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
struct bpf_prog_offload *offload;
bool ret;
if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
if (!bpf_prog_is_dev_bound(prog->aux))
return false;
if (!bpf_map_is_dev_bound(map))
return bpf_map_offload_neutral(map);
down_read(&bpf_devs_lock);
offload = prog->aux->offload;
......
......@@ -262,16 +262,11 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
return ret;
}
static void stack_map_get_build_id_offset(struct bpf_map *map,
struct stack_map_bucket *bucket,
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
u64 *ips, u32 trace_nr, bool user)
{
int i;
struct vm_area_struct *vma;
struct bpf_stack_build_id *id_offs;
bucket->nr = trace_nr;
id_offs = (struct bpf_stack_build_id *)bucket->data;
/*
* We cannot do up_read() in nmi context, so build_id lookup is
......@@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
pcpu_freelist_pop(&smap->freelist);
if (unlikely(!new_bucket))
return -ENOMEM;
stack_map_get_build_id_offset(map, new_bucket, ips,
trace_nr, user);
new_bucket->nr = trace_nr;
stack_map_get_build_id_offset(
(struct bpf_stack_build_id *)new_bucket->data,
ips, trace_nr, user);
trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
if (hash_matches && bucket->nr == trace_nr &&
memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
......@@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
bool kernel = !user;
int err = -EINVAL;
u64 *ips;
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
BPF_F_USER_BUILD_ID)))
goto clear;
if (kernel && user_build_id)
goto clear;
elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
: sizeof(u64);
if (unlikely(size % elem_size))
goto clear;
num_elem = size / elem_size;
if (sysctl_perf_event_max_stack < num_elem)
init_nr = 0;
else
init_nr = sysctl_perf_event_max_stack - num_elem;
trace = get_perf_callchain(regs, init_nr, kernel, user,
sysctl_perf_event_max_stack, false, false);
if (unlikely(!trace))
goto err_fault;
trace_nr = trace->nr - init_nr;
if (trace_nr < skip)
goto err_fault;
trace_nr -= skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
ips = trace->ip + skip + init_nr;
if (user && user_build_id)
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
else
memcpy(buf, ips, copy_len);
if (size > copy_len)
memset(buf + copy_len, 0, size - copy_len);
return copy_len;
err_fault:
err = -EFAULT;
clear:
memset(buf, 0, size);
return err;
}
const struct bpf_func_proto bpf_get_stack_proto = {
.func = bpf_get_stack,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
......
......@@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map)
{
__bpf_map_put(map, true);
}
EXPORT_SYMBOL_GPL(bpf_map_put);
void bpf_map_put_with_uref(struct bpf_map *map)
{
......@@ -503,7 +504,6 @@ static int map_create(union bpf_attr *attr)
return err;
}
trace_bpf_map_create(map, err);
return err;
free_map:
......@@ -544,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
atomic_inc(&map->usercnt);
return map;
}
EXPORT_SYMBOL_GPL(bpf_map_inc);
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
......@@ -663,7 +664,6 @@ static int map_lookup_elem(union bpf_attr *attr)
if (copy_to_user(uvalue, value, value_size) != 0)
goto free_value;
trace_bpf_map_lookup_elem(map, ufd, key, value);
err = 0;
free_value:
......@@ -760,8 +760,6 @@ static int map_update_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
out:
if (!err)
trace_bpf_map_update_elem(map, ufd, key, value);
free_value:
kfree(value);
free_key:
......@@ -814,8 +812,6 @@ static int map_delete_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
out:
if (!err)
trace_bpf_map_delete_elem(map, ufd, key);
kfree(key);
err_put:
fdput(f);
......@@ -879,7 +875,6 @@ static int map_get_next_key(union bpf_attr *attr)
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
goto free_next_key;
trace_bpf_map_next_key(map, ufd, key, next_key);
err = 0;
free_next_key:
......@@ -1027,7 +1022,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
if (atomic_dec_and_test(&prog->aux->refcnt)) {
int i;
trace_bpf_prog_put_rcu(prog);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
......@@ -1194,11 +1188,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
bool attach_drv)
{
struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv);
if (!IS_ERR(prog))
trace_bpf_prog_get_type(prog);
return prog;
return __bpf_prog_get(ufd, &type, attach_drv);
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
......@@ -1373,7 +1363,6 @@ static int bpf_prog_load(union bpf_attr *attr)
}
bpf_prog_kallsyms_add(prog);
trace_bpf_prog_load(prog, err);
return err;
free_used_maps:
......
......@@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
return TNUM(a.value >> shift, a.mask >> shift);
}
struct tnum tnum_arshift(struct tnum a, u8 min_shift)
{
/* if a.value is negative, arithmetic shifting by minimum shift
* will have larger negative offset compared to more shifting.
* If a.value is nonnegative, arithmetic shifting by minimum shift
* will have larger positive offset compare to more shifting.
*/
return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
}
struct tnum tnum_add(struct tnum a, struct tnum b)
{
u64 sm, sv, sigma, chi, mu;
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/* XSKMAP used for AF_XDP sockets
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/bpf.h>
#include <linux/capability.h>
#include <net/xdp_sock.h>
#include <linux/slab.h>
#include <linux/sched.h>
struct xsk_map {
struct bpf_map map;
struct xdp_sock **xsk_map;
struct list_head __percpu *flush_list;
};
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
int cpu, err = -EINVAL;
struct xsk_map *m;
u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 ||
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
return ERR_PTR(-EINVAL);
m = kzalloc(sizeof(*m), GFP_USER);
if (!m)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&m->map, attr);
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
cost += sizeof(struct list_head) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_m;
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */
err = bpf_map_precharge_memlock(m->map.pages);
if (err)
goto free_m;
err = -ENOMEM;
m->flush_list = alloc_percpu(struct list_head);
if (!m->flush_list)
goto free_m;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
sizeof(struct xdp_sock *),
m->map.numa_node);
if (!m->xsk_map)
goto free_percpu;
return &m->map;
free_percpu:
free_percpu(m->flush_list);
free_m:
kfree(m);
return ERR_PTR(err);
}
static void xsk_map_free(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
int i;
synchronize_net();
for (i = 0; i < map->max_entries; i++) {
struct xdp_sock *xs;
xs = m->xsk_map[i];
if (!xs)
continue;
sock_put((struct sock *)xs);
}
free_percpu(m->flush_list);
bpf_map_area_free(m->xsk_map);
kfree(m);
}
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = next_key;
if (index >= m->map.max_entries) {
*next = 0;
return 0;
}
if (index == m->map.max_entries - 1)
return -ENOENT;
*next = index + 1;
return 0;
}
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock *xs;
if (key >= map->max_entries)
return NULL;
xs = READ_ONCE(m->xsk_map[key]);
return xs;
}
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
int err;
err = xsk_rcv(xs, xdp);
if (err)
return err;
if (!xs->flush_node.prev)
list_add(&xs->flush_node, flush_list);
return 0;
}
void __xsk_map_flush(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
xsk_flush(xs);
__list_del(xs->flush_node.prev, xs->flush_node.next);
xs->flush_node.prev = NULL;
}
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
return NULL;
}
static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
u32 i = *(u32 *)key, fd = *(u32 *)value;
struct xdp_sock *xs, *old_xs;
struct socket *sock;
int err;
if (unlikely(map_flags > BPF_EXIST))
return -EINVAL;
if (unlikely(i >= m->map.max_entries))
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
sock = sockfd_lookup(fd, &err);
if (!sock)
return err;
if (sock->sk->sk_family != PF_XDP) {
sockfd_put(sock);
return -EOPNOTSUPP;
}
xs = (struct xdp_sock *)sock->sk;
if (!xsk_is_setup_for_bpf_map(xs)) {
sockfd_put(sock);
return -EOPNOTSUPP;
}
sock_hold(sock->sk);
old_xs = xchg(&m->xsk_map[i], xs);
if (old_xs) {
/* Make sure we've flushed everything. */
synchronize_net();
sock_put((struct sock *)old_xs);
}
sockfd_put(sock);
return 0;
}
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock *old_xs;
int k = *(u32 *)key;
if (k >= map->max_entries)
return -EINVAL;
old_xs = xchg(&m->xsk_map[k], NULL);
if (old_xs) {
/* Make sure we've flushed everything. */
synchronize_net();
sock_put((struct sock *)old_xs);
}
return 0;
}
const struct bpf_map_ops xsk_map_ops = {
.map_alloc = xsk_map_alloc,
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
.map_lookup_elem = xsk_map_lookup_elem,
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
};
......@@ -20,6 +20,7 @@
#include "trace.h"
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
/**
* trace_call_bpf - invoke BPF program
......@@ -474,8 +475,6 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
if (unlikely(in_interrupt()))
return -EINVAL;
if (unlikely(idx >= array->map.max_entries))
return -E2BIG;
......@@ -577,6 +576,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_event_output_proto;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
......@@ -664,6 +665,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
u64, flags)
{
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
(unsigned long) size, flags, 0);
}
static const struct bpf_func_proto bpf_get_stack_proto_tp = {
.func = bpf_get_stack_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
......@@ -672,6 +692,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_event_output_proto_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_tp;
default:
return tracing_func_proto(func_id, prog);
}
......@@ -734,6 +756,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_event_output_proto_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_tp;
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
default:
......@@ -744,7 +768,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
* inside bpf_*_event_output and/or bpf_get_stack_id
* inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
*/
static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
......@@ -787,6 +811,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
void *, buf, u32, size, u64, flags)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
perf_fetch_caller_regs(regs);
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
(unsigned long) size, flags, 0);
}
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
.func = bpf_get_stack_raw_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
......@@ -795,6 +839,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_event_output_proto_raw_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_raw_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_raw_tp;
default:
return tracing_func_proto(func_id, prog);
}
......
This diff is collapsed.
......@@ -59,6 +59,7 @@ source "net/tls/Kconfig"
source "net/xfrm/Kconfig"
source "net/iucv/Kconfig"
source "net/smc/Kconfig"
source "net/xdp/Kconfig"
config INET
bool "TCP/IP networking"
......
......@@ -85,3 +85,4 @@ obj-y += l3mdev/
endif
obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
This diff is collapsed.
This diff is collapsed.
......@@ -226,7 +226,8 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX"
x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
x "AF_MAX"
static const char *const af_family_key_strings[AF_MAX+1] = {
_sock_locks("sk_lock-")
......@@ -262,7 +263,8 @@ static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
"rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
"rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
"rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
"rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
"rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_XDP" ,
"rlock-AF_MAX"
};
static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
"wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
......@@ -279,7 +281,8 @@ static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
"wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
"wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
"wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
"wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
"wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_XDP" ,
"wlock-AF_MAX"
};
static const char *const af_family_elock_key_strings[AF_MAX+1] = {
"elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
......@@ -296,7 +299,8 @@ static const char *const af_family_elock_key_strings[AF_MAX+1] = {
"elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
"elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
"elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
"elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
"elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_XDP" ,
"elock-AF_MAX"
};
/*
......
......@@ -308,11 +308,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
void xdp_return_frame(struct xdp_frame *xdpf)
static void xdp_return(void *data, struct xdp_mem_info *mem)
{
struct xdp_mem_info *mem = &xdpf->mem;
struct xdp_mem_allocator *xa;
void *data = xdpf->data;
struct page *page;
switch (mem->type) {
......@@ -339,4 +337,15 @@ void xdp_return_frame(struct xdp_frame *xdpf)
break;
}
}
void xdp_return_frame(struct xdp_frame *xdpf)
{
xdp_return(xdpf->data, &xdpf->mem);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_buff(struct xdp_buff *xdp)
{
xdp_return(xdp->data, &xdp->rxq->mem);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
This diff is collapsed.
config XDP_SOCKETS
bool "XDP sockets"
depends on BPF_SYSCALL
default n
help
XDP sockets allows a channel between XDP programs and
userspace applications.
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
This diff is collapsed.
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0
* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef XDP_UMEM_PROPS_H_
#define XDP_UMEM_PROPS_H_
struct xdp_umem_props {
u32 frame_size;
u32 nframes;
};
#endif /* XDP_UMEM_PROPS_H_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment