Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Minor conflict, a CHECK was placed into an if() statement in net-next, whilst a newline was added to that CHECK call in 'net'. Thanks to Daniel for the merge resolution. Signed-off-by: David S. Miller <davem@davemloft.net>

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Minor conflict, a CHECK was placed into an if() statement in net-next, whilst a newline was added to that CHECK call in 'net'. Thanks to Daniel for the merge resolution. Signed-off-by: David S. Miller <davem@davemloft.net>
01adc485 · David S. Miller · 18b338f5 · e94fa1d9 · 01adc485 · 01adc485
Commit 01adc485 authored May 07, 2018 by David S. Miller
107 changed files
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -483,6 +483,12 @@ Example output from dmesg:
 [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
 [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3

+When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and
+setting any other value than that will return in failure. This is even the case for
+setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log
+is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the
+generally recommended approach instead.
+
 In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for
 generating disassembly out of the kernel log's hexdump:


--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -6,6 +6,7 @@ Contents:
 .. toctree::
   :maxdepth: 2

+   af_xdp
   batman-adv
   can
   dpaa2/index

--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -45,6 +45,7 @@ through bpf(2) and passing a verifier in the kernel, a JIT will then
 translate these BPF proglets into native CPU instructions. There are
 two flavors of JITs, the newer eBPF JIT currently supported on:
  - x86_64
+  - x86_32
  - arm64
  - arm32
  - ppc64

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2729,7 +2729,6 @@ F:	Documentation/networking/filter.txt
 F:	Documentation/bpf/
 F:	include/linux/bpf*
 F:	include/linux/filter.h
-F:	include/trace/events/bpf.h
 F:	include/trace/events/xdp.h
 F:	include/uapi/linux/bpf*
 F:	include/uapi/linux/filter.h
@@ -15408,6 +15407,14 @@ T:	git git://linuxtv.org/media_tree.git
 S:	Maintained
 F:	drivers/media/tuners/tuner-xc2028.*

+XDP SOCKETS (AF_XDP)
+M:	Björn Töpel <bjorn.topel@intel.com>
+M:	Magnus Karlsson <magnus.karlsson@intel.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	kernel/bpf/xskmap.c
+F:	net/xdp/
+
 XEN BLOCK SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 M:	Roger Pau Monné <roger.pau@citrix.com>

--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1452,83 +1452,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
 		emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
 		break;
-	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
-	case BPF_LD | BPF_ABS | BPF_W:
-	case BPF_LD | BPF_ABS | BPF_H:
-	case BPF_LD | BPF_ABS | BPF_B:
-	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
-	case BPF_LD | BPF_IND | BPF_W:
-	case BPF_LD | BPF_IND | BPF_H:
-	case BPF_LD | BPF_IND | BPF_B:
-	{
-		const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
-		const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
-						     /* rtn value */
-		const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
-		const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
-		const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
-		const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
-		int size;
-
-		/* Setting up first argument */
-		emit(ARM_MOV_R(r0, r4), ctx);
-
-		/* Setting up second argument */
-		emit_a32_mov_i(r1, imm, false, ctx);
-		if (BPF_MODE(code) == BPF_IND)
-			emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
-				       false, false, BPF_ADD);
-
-		/* Setting up third argument */
-		switch (BPF_SIZE(code)) {
-		case BPF_W:
-			size = 4;
-			break;
-		case BPF_H:
-			size = 2;
-			break;
-		case BPF_B:
-			size = 1;
-			break;
-		default:
-			return -EINVAL;
-		}
-		emit_a32_mov_i(r2, size, false, ctx);
-
-		/* Setting up fourth argument */
-		emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
-
-		/* Setting up function pointer to call */
-		emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
-		emit_blx_r(r6, ctx);
-
-		emit(ARM_EOR_R(r1, r1, r1), ctx);
-		/* Check if return address is NULL or not.
-		 * if NULL then jump to epilogue
-		 * else continue to load the value from retn address
-		 */
-		emit(ARM_CMP_I(r0, 0), ctx);
-		jmp_offset = epilogue_offset(ctx);
-		check_imm24(jmp_offset);
-		_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
-
-		/* Load value from the address */
-		switch (BPF_SIZE(code)) {
-		case BPF_W:
-			emit(ARM_LDR_I(r0, r0, 0), ctx);
-			emit_rev32(r0, r0, ctx);
-			break;
-		case BPF_H:
-			emit(ARM_LDRH_I(r0, r0, 0), ctx);
-			emit_rev16(r0, r0, ctx);
-			break;
-		case BPF_B:
-			emit(ARM_LDRB_I(r0, r0, 0), ctx);
-			/* No need to reverse */
-			break;
-		}
-		break;
-	}
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_W:
 	case BPF_ST | BPF_MEM | BPF_H:

--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -723,71 +723,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
 		break;

-	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
-	case BPF_LD | BPF_ABS | BPF_W:
-	case BPF_LD | BPF_ABS | BPF_H:
-	case BPF_LD | BPF_ABS | BPF_B:
-	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
-	case BPF_LD | BPF_IND | BPF_W:
-	case BPF_LD | BPF_IND | BPF_H:
-	case BPF_LD | BPF_IND | BPF_B:
-	{
-		const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
-		const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
-		const u8 fp = bpf2a64[BPF_REG_FP];
-		const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
-		const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
-		const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
-		const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
-		const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
-		int size;
-
-		emit(A64_MOV(1, r1, r6), ctx);
-		emit_a64_mov_i(0, r2, imm, ctx);
-		if (BPF_MODE(code) == BPF_IND)
-			emit(A64_ADD(0, r2, r2, src), ctx);
-		switch (BPF_SIZE(code)) {
-		case BPF_W:
-			size = 4;
-			break;
-		case BPF_H:
-			size = 2;
-			break;
-		case BPF_B:
-			size = 1;
-			break;
-		default:
-			return -EINVAL;
-		}
-		emit_a64_mov_i64(r3, size, ctx);
-		emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
-		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
-		emit(A64_BLR(r5), ctx);
-		emit(A64_MOV(1, r0, A64_R(0)), ctx);
-
-		jmp_offset = epilogue_offset(ctx);
-		check_imm19(jmp_offset);
-		emit(A64_CBZ(1, r0, jmp_offset), ctx);
-		emit(A64_MOV(1, r5, r0), ctx);
-		switch (BPF_SIZE(code)) {
-		case BPF_W:
-			emit(A64_LDR32(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
-			emit(A64_REV32(0, r0, r0), ctx);
-#endif
-			break;
-		case BPF_H:
-			emit(A64_LDRH(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
-			emit(A64_REV16(0, r0, r0), ctx);
-#endif
-			break;
-		case BPF_B:
-			emit(A64_LDRB(r0, r5, A64_ZR), ctx);
-			break;
-		}
-		break;
-	}
 	default:
 		pr_err_once("unknown opcode %02x\n", code);
 		return -EINVAL;

--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1267,110 +1267,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			return -EINVAL;
 		break;

-	case BPF_LD | BPF_B | BPF_ABS:
-	case BPF_LD | BPF_H | BPF_ABS:
-	case BPF_LD | BPF_W | BPF_ABS:
-	case BPF_LD | BPF_DW | BPF_ABS:
-		ctx->flags |= EBPF_SAVE_RA;
-
-		gen_imm_to_reg(insn, MIPS_R_A1, ctx);
-		emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
-
-		if (insn->imm < 0) {
-			emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
-		} else {
-			emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
-			emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
-		}
-		goto ld_skb_common;
-
-	case BPF_LD | BPF_B | BPF_IND:
-	case BPF_LD | BPF_H | BPF_IND:
-	case BPF_LD | BPF_W | BPF_IND:
-	case BPF_LD | BPF_DW | BPF_IND:
-		ctx->flags |= EBPF_SAVE_RA;
-		src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
-		if (src < 0)
-			return src;
-		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
-		if (ts == REG_32BIT_ZERO_EX) {
-			/* sign extend */
-			emit_instr(ctx, sll, MIPS_R_A1, src, 0);
-			src = MIPS_R_A1;
-		}
-		if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
-			emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
-		} else {
-			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-			emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
-		}
-		/* truncate to 32-bit int */
-		emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
-		emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
-		emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
-
-		emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
-		emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
-		emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
-		emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
-
-ld_skb_common:
-		emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
-		/* delay slot move */
-		emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
-
-		/* Check the error value */
-		b_off = b_imm(exit_idx, ctx);
-		if (is_bad_offset(b_off)) {
-			target = j_target(ctx, exit_idx);
-			if (target == (unsigned int)-1)
-				return -E2BIG;
-
-			if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
-				ctx->offsets[this_idx] |= OFFSETS_B_CONV;
-				ctx->long_b_conversion = 1;
-			}
-			emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
-			emit_instr(ctx, nop);
-			emit_instr(ctx, j, target);
-			emit_instr(ctx, nop);
-		} else {
-			emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
-			emit_instr(ctx, nop);
-		}
-
-#ifdef __BIG_ENDIAN
-		need_swap = false;
-#else
-		need_swap = true;
-#endif
-		dst = MIPS_R_V0;
-		switch (BPF_SIZE(insn->code)) {
-		case BPF_B:
-			emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
-			break;
-		case BPF_H:
-			emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
-			if (need_swap)
-				emit_instr(ctx, wsbh, dst, dst);
-			break;
-		case BPF_W:
-			emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
-			if (need_swap) {
-				emit_instr(ctx, wsbh, dst, dst);
-				emit_instr(ctx, rotr, dst, dst, 16);
-			}
-			break;
-		case BPF_DW:
-			emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
-			if (need_swap) {
-				emit_instr(ctx, dsbh, dst, dst);
-				emit_instr(ctx, dshd, dst, dst);
-			}
-			break;
-		}
-
-		break;
 	case BPF_ALU | BPF_END | BPF_FROM_BE:
 	case BPF_ALU | BPF_END | BPF_FROM_LE:
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);

--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -3,7 +3,7 @@
 # Arch-specific network modules
 #
 ifeq ($(CONFIG_PPC64),y)
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
 else
 obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
 endif
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -20,7 +20,7 @@
 * with our redzone usage.
 *
 *		[	prev sp		] <-------------
- *		[   nv gpr save area	] 8*8		|
+ *		[   nv gpr save area	] 6*8		|
 *		[    tail_call_cnt	] 8		|
 *		[    local_tmp_var	] 8		|
 * fp (r31) -->	[   ebpf stack space	] upto 512	|
@@ -28,8 +28,8 @@
 * sp (r1) --->	[    stack pointer	] --------------
 */

-/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
-#define BPF_PPC_STACK_SAVE	(8*8)
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE	(6*8)
 /* for bpf JIT code internal usage */
 #define BPF_PPC_STACK_LOCALS	16
 /* stack frame excluding BPF stack, ensure this is quadword aligned */
@@ -39,10 +39,8 @@
 #ifndef __ASSEMBLY__

 /* BPF register usage */
-#define SKB_HLEN_REG	(MAX_BPF_JIT_REG + 0)
-#define SKB_DATA_REG	(MAX_BPF_JIT_REG + 1)
-#define TMP_REG_1	(MAX_BPF_JIT_REG + 2)
-#define TMP_REG_2	(MAX_BPF_JIT_REG + 3)
+#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)

 /* BPF to ppc register mappings */
 static const int b2p[] = {
@@ -63,40 +61,23 @@ static const int b2p[] = {
 	[BPF_REG_FP] = 31,
 	/* eBPF jit internal registers */
 	[BPF_REG_AX] = 2,
-	[SKB_HLEN_REG] = 25,
-	[SKB_DATA_REG] = 26,
 	[TMP_REG_1] = 9,
 	[TMP_REG_2] = 10
 };

-/* PPC NVR range -- update this if we ever use NVRs below r24 */
-#define BPF_PPC_NVR_MIN		24
-
-/* Assembly helpers */
-#define DECLARE_LOAD_FUNC(func)	u64 func(u64 r3, u64 r4);			\
-				u64 func##_negative_offset(u64 r3, u64 r4);	\
-				u64 func##_positive_offset(u64 r3, u64 r4);
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-
-#define CHOOSE_LOAD_FUNC(imm, func)						\
-			(imm < 0 ?						\
-			(imm >= SKF_LL_OFF ? func##_negative_offset : func) :	\
-			func##_positive_offset)
+/* PPC NVR range -- update this if we ever use NVRs below r27 */
+#define BPF_PPC_NVR_MIN		27

 #define SEEN_FUNC	0x1000 /* might call external helpers */
 #define SEEN_STACK	0x2000 /* uses BPF stack */
-#define SEEN_SKB	0x4000 /* uses sk_buff */
-#define SEEN_TAILCALL	0x8000 /* uses tail calls */
+#define SEEN_TAILCALL	0x4000 /* uses tail calls */

 struct codegen_context {
 	/*
 	 * This is used to track register usage as well
 	 * as calls to external helpers.
 	 * - register usage is tracked with corresponding
-	 *   bits (r3-r10 and r25-r31)
+	 *   bits (r3-r10 and r27-r31)
 	 * - rest of the bits can be used to track other
 	 *   things -- for now, we use bits 16 to 23
 	 *   encoded in SEEN_* macros above

--- a/arch/powerpc/net/bpf_jit_asm64.S
+++ b/arch/powerpc/net/bpf_jit_asm64.S
-/*
- * bpf_jit_asm64.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
- * 		   IBM Corporation
- *
- * Based on bpf_jit_asm.S by Matt Evans
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/ptrace.h>
-#include "bpf_jit64.h"
-
-/*
- * All of these routines are called directly from generated code,
- * with the below register usage:
- * r27		skb pointer (ctx)
- * r25		skb header length
- * r26		skb->data pointer
- * r4		offset
- *
- * Result is passed back in:
- * r8		data read in host endian format (accumulator)
- *
- * r9 is used as a temporary register
- */
-
-#define r_skb	r27
-#define r_hlen	r25
-#define r_data	r26
-#define r_off	r4
-#define r_val	r8
-#define r_tmp	r9
-
-_GLOBAL_TOC(sk_load_word)
-	cmpdi	r_off, 0
-	blt	bpf_slow_path_word_neg
-	b	sk_load_word_positive_offset
-
-_GLOBAL_TOC(sk_load_word_positive_offset)
-	/* Are we accessing past headlen? */
-	subi	r_tmp, r_hlen, 4
-	cmpd	r_tmp, r_off
-	blt	bpf_slow_path_word
-	/* Nope, just hitting the header.  cr0 here is eq or gt! */
-	LWZX_BE	r_val, r_data, r_off
-	blr	/* Return success, cr0 != LT */
-
-_GLOBAL_TOC(sk_load_half)
-	cmpdi	r_off, 0
-	blt	bpf_slow_path_half_neg
-	b	sk_load_half_positive_offset
-
-_GLOBAL_TOC(sk_load_half_positive_offset)
-	subi	r_tmp, r_hlen, 2
-	cmpd	r_tmp, r_off
-	blt	bpf_slow_path_half
-	LHZX_BE	r_val, r_data, r_off
-	blr
-
-_GLOBAL_TOC(sk_load_byte)
-	cmpdi	r_off, 0
-	blt	bpf_slow_path_byte_neg
-	b	sk_load_byte_positive_offset
-
-_GLOBAL_TOC(sk_load_byte_positive_offset)
-	cmpd	r_hlen, r_off
-	ble	bpf_slow_path_byte
-	lbzx	r_val, r_data, r_off
-	blr
-
-/*
- * Call out to skb_copy_bits:
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE)					\
-	mflr	r0;							\
-	std	r0, PPC_LR_STKOFF(r1);					\
-	stdu	r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1);	\
-	mr	r3, r_skb;						\
-	/* r4 = r_off as passed */					\
-	addi	r5, r1, STACK_FRAME_MIN_SIZE;				\
-	li	r6, SIZE;						\
-	bl	skb_copy_bits;						\
-	nop;								\
-	/* save r5 */							\
-	addi	r5, r1, STACK_FRAME_MIN_SIZE;				\
-	/* r3 = 0 on success */						\
-	addi	r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS;	\
-	ld	r0, PPC_LR_STKOFF(r1);					\
-	mtlr	r0;							\
-	cmpdi	r3, 0;							\
-	blt	bpf_error;	/* cr0 = LT */
-
-bpf_slow_path_word:
-	bpf_slow_path_common(4)
-	/* Data value is on stack, and cr0 != LT */
-	LWZX_BE	r_val, 0, r5
-	blr
-
-bpf_slow_path_half:
-	bpf_slow_path_common(2)
-	LHZX_BE	r_val, 0, r5
-	blr
-
-bpf_slow_path_byte:
-	bpf_slow_path_common(1)
-	lbzx	r_val, 0, r5
-	blr
-
-/*
- * Call out to bpf_internal_load_pointer_neg_helper
- */
-#define sk_negative_common(SIZE)				\
-	mflr	r0;						\
-	std	r0, PPC_LR_STKOFF(r1);				\
-	stdu	r1, -STACK_FRAME_MIN_SIZE(r1);			\
-	mr	r3, r_skb;					\
-	/* r4 = r_off, as passed */				\
-	li	r5, SIZE;					\
-	bl	bpf_internal_load_pointer_neg_helper;		\
-	nop;							\
-	addi	r1, r1, STACK_FRAME_MIN_SIZE;			\
-	ld	r0, PPC_LR_STKOFF(r1);				\
-	mtlr	r0;						\
-	/* R3 != 0 on success */				\
-	cmpldi	r3, 0;						\
-	beq	bpf_error_slow;	/* cr0 = EQ */
-
-bpf_slow_path_word_neg:
-	lis     r_tmp, -32	/* SKF_LL_OFF */
-	cmpd	r_off, r_tmp	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	b	sk_load_word_negative_offset
-
-_GLOBAL_TOC(sk_load_word_negative_offset)
-	sk_negative_common(4)
-	LWZX_BE	r_val, 0, r3
-	blr
-
-bpf_slow_path_half_neg:
-	lis     r_tmp, -32	/* SKF_LL_OFF */
-	cmpd	r_off, r_tmp	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	b	sk_load_half_negative_offset
-
-_GLOBAL_TOC(sk_load_half_negative_offset)
-	sk_negative_common(2)
-	LHZX_BE	r_val, 0, r3
-	blr
-
-bpf_slow_path_byte_neg:
-	lis     r_tmp, -32	/* SKF_LL_OFF */
-	cmpd	r_off, r_tmp	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	b	sk_load_byte_negative_offset
-
-_GLOBAL_TOC(sk_load_byte_negative_offset)
-	sk_negative_common(1)
-	lbzx	r_val, 0, r3
-	blr
-
-bpf_error_slow:
-	/* fabricate a cr0 = lt */
-	li	r_tmp, -1
-	cmpdi	r_tmp, 0
-bpf_error:
-	/*
-	 * Entered with cr0 = lt
-	 * Generated code will 'blt epilogue', returning 0.
-	 */
-	li	r_val, 0
-	blr
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 *		[	prev sp		] <-------------
 *		[	  ...       	] 		|
 * sp (r1) --->	[    stack pointer	] --------------
- *		[   nv gpr save area	] 8*8
+ *		[   nv gpr save area	] 6*8
 *		[    tail_call_cnt	] 8
 *		[    local_tmp_var	] 8
 *		[   unused red zone	] 208 bytes protected
@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 	BUG();
 }

-static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
-{
-	/*
-	 * Load skb->len and skb->data_len
-	 * r3 points to skb
-	 */
-	PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
-	PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
-	/* header_len = len - data_len */
-	PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
-
-	/* skb->data pointer */
-	PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
-}
-
 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 		if (bpf_is_seen_register(ctx, i))
 			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));

-	/*
-	 * Save additional non-volatile regs if we cache skb
-	 * Also, setup skb data
-	 */
-	if (ctx->seen & SEEN_SKB) {
-		PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
-				bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
-		PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
-				bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
-		bpf_jit_emit_skb_loads(image, ctx);
-	}
-
 	/* Setup frame pointer to point to the bpf stack area */
 	if (bpf_is_seen_register(ctx, BPF_REG_FP))
 		PPC_ADDI(b2p[BPF_REG_FP], 1,
@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
 		if (bpf_is_seen_register(ctx, i))
 			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));

-	/* Restore non-volatile registers used for skb cache */
-	if (ctx->seen & SEEN_SKB) {
-		PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
-				bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
-		PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
-				bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
-	}
-
 	/* Tear down our stack frame */
 	if (bpf_has_stack_frame(ctx)) {
 		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
@@ -753,23 +718,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			ctx->seen |= SEEN_FUNC;
 			func = (u8 *) __bpf_call_base + imm;

-			/* Save skb pointer if we need to re-cache skb data */
-			if ((ctx->seen & SEEN_SKB) &&
-			    bpf_helper_changes_pkt_data(func))
-				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
-
 			bpf_jit_emit_func_call(image, ctx, (u64)func);

 			/* move return value from r3 to BPF_REG_0 */
 			PPC_MR(b2p[BPF_REG_0], 3);
-
-			/* refresh skb cache */
-			if ((ctx->seen & SEEN_SKB) &&
-			    bpf_helper_changes_pkt_data(func)) {
-				/* reload skb pointer to r3 */
-				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
-				bpf_jit_emit_skb_loads(image, ctx);
-			}
 			break;

 		/*
@@ -886,65 +838,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			PPC_BCC(true_cond, addrs[i + 1 + off]);
 			break;

-		/*
-		 * Loads from packet header/data
-		 * Assume 32-bit input value in imm and X (src_reg)
-		 */
-
-		/* Absolute loads */
-		case BPF_LD | BPF_W | BPF_ABS:
-			func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
-			goto common_load_abs;
-		case BPF_LD | BPF_H | BPF_ABS:
-			func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
-			goto common_load_abs;
-		case BPF_LD | BPF_B | BPF_ABS:
-			func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
-common_load_abs:
-			/*
-			 * Load from [imm]
-			 * Load into r4, which can just be passed onto
-			 *  skb load helpers as the second parameter
-			 */
-			PPC_LI32(4, imm);
-			goto common_load;
-
-		/* Indirect loads */
-		case BPF_LD | BPF_W | BPF_IND:
-			func = (u8 *)sk_load_word;
-			goto common_load_ind;
-		case BPF_LD | BPF_H | BPF_IND:
-			func = (u8 *)sk_load_half;
-			goto common_load_ind;
-		case BPF_LD | BPF_B | BPF_IND:
-			func = (u8 *)sk_load_byte;
-common_load_ind:
-			/*
-			 * Load from [src_reg + imm]
-			 * Treat src_reg as a 32-bit value
-			 */
-			PPC_EXTSW(4, src_reg);
-			if (imm) {
-				if (imm >= -32768 && imm < 32768)
-					PPC_ADDI(4, 4, IMM_L(imm));
-				else {
-					PPC_LI32(b2p[TMP_REG_1], imm);
-					PPC_ADD(4, 4, b2p[TMP_REG_1]);
-				}
-			}
-
-common_load:
-			ctx->seen |= SEEN_SKB;
-			ctx->seen |= SEEN_FUNC;
-			bpf_jit_emit_func_call(image, ctx, (u64)func);
-
-			/*
-			 * Helper returns 'lt' condition on error, and an
-			 * appropriate return value in BPF_REG_0
-			 */
-			PPC_BCC(COND_LT, exit_addr);
-			break;
-
 		/*
 		 * Tail call
 		 */

--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -2,4 +2,4 @@
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * BPF Jit compiler for s390, help functions.
- *
- * Copyright IBM Corp. 2012,2015
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include "bpf_jit.h"
-
-/*
- * Calling convention:
- * registers %r7-%r10, %r11,%r13, and %r15 are call saved
- *
- * Input (64 bit):
- *   %r3 (%b2) = offset into skb data
- *   %r6 (%b5) = return address
- *   %r7 (%b6) = skb pointer
- *   %r12      = skb data pointer
- *
- * Output:
- *   %r14= %b0 = return value (read skb value)
- *
- * Work registers: %r2,%r4,%r5,%r14
- *
- * skb_copy_bits takes 4 parameters:
- *   %r2 = skb pointer
- *   %r3 = offset into skb data
- *   %r4 = pointer to temp buffer
- *   %r5 = length to copy
- *   Return value in %r2: 0 = ok
- *
- * bpf_internal_load_pointer_neg_helper takes 3 parameters:
- *   %r2 = skb pointer
- *   %r3 = offset into data
- *   %r4 = length to copy
- *   Return value in %r2: Pointer to data
- */
-
-#define SKF_MAX_NEG_OFF	-0x200000	/* SKF_LL_OFF from filter.h */
-
-/*
- * Load SIZE bytes from SKB
- */
-#define sk_load_common(NAME, SIZE, LOAD)				\
-ENTRY(sk_load_##NAME);							\
-	ltgr	%r3,%r3;		/* Is offset negative? */	\
-	jl	sk_load_##NAME##_slow_neg;				\
-ENTRY(sk_load_##NAME##_pos);						\
-	aghi	%r3,SIZE;		/* Offset + SIZE */		\
-	clg	%r3,STK_OFF_HLEN(%r15);	/* Offset + SIZE > hlen? */	\
-	jh	sk_load_##NAME##_slow;					\
-	LOAD	%r14,-SIZE(%r3,%r12);	/* Get data from skb */		\
-	b	OFF_OK(%r6);		/* Return */			\
-									\
-sk_load_##NAME##_slow:;							\
-	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
-	aghi	%r3,-SIZE;		/* Arg2 = offset */		\
-	la	%r4,STK_OFF_TMP(%r15);	/* Arg3 = temp bufffer */	\
-	lghi	%r5,SIZE;		/* Arg4 = size */		\
-	brasl	%r14,skb_copy_bits;	/* Get data from skb */		\
-	LOAD	%r14,STK_OFF_TMP(%r15);	/* Load from temp bufffer */	\
-	ltgr	%r2,%r2;		/* Set cc to (%r2 != 0) */	\
-	br	%r6;			/* Return */
-
-sk_load_common(word, 4, llgf)	/* r14 = *(u32 *) (skb->data+offset) */
-sk_load_common(half, 2, llgh)	/* r14 = *(u16 *) (skb->data+offset) */
-
-/*
- * Load 1 byte from SKB (optimized version)
- */
-	/* r14 = *(u8 *) (skb->data+offset) */
-ENTRY(sk_load_byte)
-	ltgr	%r3,%r3			# Is offset negative?
-	jl	sk_load_byte_slow_neg
-ENTRY(sk_load_byte_pos)
-	clg	%r3,STK_OFF_HLEN(%r15)	# Offset >= hlen?
-	jnl	sk_load_byte_slow
-	llgc	%r14,0(%r3,%r12)	# Get byte from skb
-	b	OFF_OK(%r6)		# Return OK
-
-sk_load_byte_slow:
-	lgr	%r2,%r7			# Arg1 = skb pointer
-					# Arg2 = offset
-	la	%r4,STK_OFF_TMP(%r15)	# Arg3 = pointer to temp buffer
-	lghi	%r5,1			# Arg4 = size (1 byte)
-	brasl	%r14,skb_copy_bits	# Get data from skb
-	llgc	%r14,STK_OFF_TMP(%r15)	# Load result from temp buffer
-	ltgr	%r2,%r2			# Set cc to (%r2 != 0)
-	br	%r6			# Return cc
-
-#define sk_negative_common(NAME, SIZE, LOAD)				\
-sk_load_##NAME##_slow_neg:;						\
-	cgfi	%r3,SKF_MAX_NEG_OFF;					\
-	jl	bpf_error;						\
-	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
-					/* Arg2 = offset */		\
-	lghi	%r4,SIZE;		/* Arg3 = size */		\
-	brasl	%r14,bpf_internal_load_pointer_neg_helper;		\
-	ltgr	%r2,%r2;						\
-	jz	bpf_error;						\
-	LOAD	%r14,0(%r2);		/* Get data from pointer */	\
-	xr	%r3,%r3;		/* Set cc to zero */		\
-	br	%r6;			/* Return cc */
-
-sk_negative_common(word, 4, llgf)
-sk_negative_common(half, 2, llgh)
-sk_negative_common(byte, 1, llgc)
-
-bpf_error:
-# force a return 0 from jit handler
-	ltgr	%r15,%r15	# Set condition code
-	br	%r6
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -16,9 +16,6 @@
 #include <linux/filter.h>
 #include <linux/types.h>

-extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-
 #endif /* __ASSEMBLY__ */

 /*
@@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
 *	      |		      |     |
 *	      |   BPF stack   |     |
 *	      |		      |     |
- *	      +---------------+     |
- *	      | 8 byte skbp   |     |
- * R15+176 -> +---------------+     |
- *	      | 8 byte hlen   |     |
- * R15+168 -> +---------------+     |
- *	      | 4 byte align  |     |
- *	      +---------------+     |
- *	      | 4 byte temp   |     |
- *	      | for bpf_jit.S |     |
 * R15+160 -> +---------------+     |
 *	      | new backchain |     |
 * R15+152 -> +---------------+     |
@@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
 * The stack size used by the BPF program ("BPF stack" above) is passed
 * via "aux->stack_depth".
 */
-#define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160)
+#define STK_SPACE_ADD	(160)
 #define STK_160_UNUSED	(160 - 12 * 8)
 #define STK_OFF		(STK_SPACE_ADD - STK_160_UNUSED)
-#define STK_OFF_TMP	160	/* Offset of tmp buffer on stack */
-#define STK_OFF_HLEN	168	/* Offset of SKB header length on stack */
-#define STK_OFF_SKBP	176	/* Offset of SKB pointer on stack */

 #define STK_OFF_R6	(160 - 11 * 8)	/* Offset of r6 on stack */
 #define STK_OFF_TCCNT	(160 - 12 * 8)	/* Offset of tail_call_cnt on stack */

-/* Offset to skip condition code check */
-#define OFF_OK		4
-
 #endif /* __ARCH_S390_NET_BPF_JIT_H */
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -47,23 +47,21 @@ struct bpf_jit {

 #define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */

-#define SEEN_SKB	1	/* skb access */
-#define SEEN_MEM	2	/* use mem[] for temporary storage */
-#define SEEN_RET0	4	/* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL	8	/* code uses literals */
-#define SEEN_FUNC	16	/* calls C functions */
-#define SEEN_TAIL_CALL	32	/* code uses tail calls */
-#define SEEN_REG_AX	64	/* code uses constant blinding */
-#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
+#define SEEN_MEM	(1 << 0)	/* use mem[] for temporary storage */
+#define SEEN_RET0	(1 << 1)	/* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL	(1 << 2)	/* code uses literals */
+#define SEEN_FUNC	(1 << 3)	/* calls C functions */
+#define SEEN_TAIL_CALL	(1 << 4)	/* code uses tail calls */
+#define SEEN_REG_AX	(1 << 5)	/* code uses constant blinding */
+#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)

 /*
 * s390 registers
 */
 #define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
 #define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
-#define REG_SKB_DATA	(MAX_BPF_JIT_REG + 2)	/* SKB data register */
-#define REG_L		(MAX_BPF_JIT_REG + 3)	/* Literal pool register */
-#define REG_15		(MAX_BPF_JIT_REG + 4)	/* Register 15 */
+#define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
+#define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
 #define REG_0		REG_W0			/* Register 0 */
 #define REG_1		REG_W1			/* Register 1 */
 #define REG_2		BPF_REG_1		/* Register 2 */
@@ -88,10 +86,8 @@ static const int reg2hex[] = {
 	[BPF_REG_9]	= 10,
 	/* BPF stack pointer */
 	[BPF_REG_FP]	= 13,
-	/* Register for blinding (shared with REG_SKB_DATA) */
+	/* Register for blinding */
 	[BPF_REG_AX]	= 12,
-	/* SKB data pointer */
-	[REG_SKB_DATA]	= 12,
 	/* Work registers for s390x backend */
 	[REG_W0]	= 0,
 	[REG_W1]	= 1,
@@ -384,27 +380,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 	} while (re <= 15);
 }

-/*
- * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
- * we store the SKB header length on the stack and the SKB data
- * pointer in REG_SKB_DATA if BPF_REG_AX is not used.
- */
-static void emit_load_skb_data_hlen(struct bpf_jit *jit)
-{
-	/* Header length: llgf %w1,<len>(%b1) */
-	EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
-		      offsetof(struct sk_buff, len));
-	/* s %w1,<data_len>(%b1) */
-	EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
-		   offsetof(struct sk_buff, data_len));
-	/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
-	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
-	if (!(jit->seen & SEEN_REG_AX))
-		/* lg %skb_data,data_off(%b1) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
-			      BPF_REG_1, offsetof(struct sk_buff, data));
-}
-
 /*
 * Emit function prologue
 *
@@ -445,12 +420,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
 	}
-	if (jit->seen & SEEN_SKB) {
-		emit_load_skb_data_hlen(jit);
-		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
-		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
-			      STK_OFF_SKBP);
-	}
 }

 /*
@@ -483,12 +452,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
 	int jmp_off, last, insn_count = 1;
-	unsigned int func_addr, mask;
 	u32 dst_reg = insn->dst_reg;
 	u32 src_reg = insn->src_reg;
 	u32 *addrs = jit->addrs;
 	s32 imm = insn->imm;
 	s16 off = insn->off;
+	unsigned int mask;

 	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
 		jit->seen |= SEEN_REG_AX;
@@ -970,13 +939,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		EMIT2(0x0d00, REG_14, REG_W1);
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
-		if ((jit->seen & SEEN_SKB) &&
-		    bpf_helper_changes_pkt_data((void *)func)) {
-			/* lg %b1,ST_OFF_SKBP(%r15) */
-			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
-				      REG_15, STK_OFF_SKBP);
-			emit_load_skb_data_hlen(jit);
-		}
 		break;
 	}
 	case BPF_JMP | BPF_TAIL_CALL:
@@ -1176,73 +1138,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
 		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
 		break;
-	/*
-	 * BPF_LD
-	 */
-	case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
-	case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
-		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
-			func_addr = __pa(sk_load_byte_pos);
-		else
-			func_addr = __pa(sk_load_byte);
-		goto call_fn;
-	case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
-	case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
-		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
-			func_addr = __pa(sk_load_half_pos);
-		else
-			func_addr = __pa(sk_load_half);
-		goto call_fn;
-	case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
-	case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
-		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
-			func_addr = __pa(sk_load_word_pos);
-		else
-			func_addr = __pa(sk_load_word);
-		goto call_fn;
-call_fn:
-		jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
-		REG_SET_SEEN(REG_14); /* Return address of possible func call */
-
-		/*
-		 * Implicit input:
-		 *  BPF_REG_6	 (R7) : skb pointer
-		 *  REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
-		 *
-		 * Calculated input:
-		 *  BPF_REG_2	 (R3) : offset of byte(s) to fetch in skb
-		 *  BPF_REG_5	 (R6) : return address
-		 *
-		 * Output:
-		 *  BPF_REG_0	 (R14): data read from skb
-		 *
-		 * Scratch registers (BPF_REG_1-5)
-		 */
-
-		/* Call function: llilf %w1,func_addr  */
-		EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
-
-		/* Offset: lgfi %b2,imm */
-		EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
-		if (BPF_MODE(insn->code) == BPF_IND)
-			/* agfr %b2,%src (%src is s32 here) */
-			EMIT4(0xb9180000, BPF_REG_2, src_reg);
-
-		/* Reload REG_SKB_DATA if BPF_REG_AX is used */
-		if (jit->seen & SEEN_REG_AX)
-			/* lg %skb_data,data_off(%b6) */
-			EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
-				      BPF_REG_6, offsetof(struct sk_buff, data));
-		/* basr %b5,%w1 (%b5 is call saved) */
-		EMIT2(0x0d00, BPF_REG_5, REG_W1);
-
-		/*
-		 * Note: For fast access we jump directly after the
-		 * jnz instruction from bpf_jit.S
-		 */
-		/* jnz <ret0> */
-		EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
-		break;
 	default: /* too complex, give up */
 		pr_err("Unknown opcode %02x\n", insn->code);
 		return -1;

--- a/arch/sparc/net/Makefile
+++ b/arch/sparc/net/Makefile
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o
+ifeq ($(BITS),32)
+obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o
+endif
--- a/arch/sparc/net/bpf_jit_64.h
+++ b/arch/sparc/net/bpf_jit_64.h
@@ -33,35 +33,6 @@
 #define I5		0x1d
 #define FP		0x1e
 #define I7		0x1f
-
-#define r_SKB		L0
-#define r_HEADLEN	L4
-#define r_SKB_DATA	L5
-#define r_TMP		G1
-#define r_TMP2		G3
-
-/* assembly code in arch/sparc/net/bpf_jit_asm_64.S */
-extern u32 bpf_jit_load_word[];
-extern u32 bpf_jit_load_half[];
-extern u32 bpf_jit_load_byte[];
-extern u32 bpf_jit_load_byte_msh[];
-extern u32 bpf_jit_load_word_positive_offset[];
-extern u32 bpf_jit_load_half_positive_offset[];
-extern u32 bpf_jit_load_byte_positive_offset[];
-extern u32 bpf_jit_load_byte_msh_positive_offset[];
-extern u32 bpf_jit_load_word_negative_offset[];
-extern u32 bpf_jit_load_half_negative_offset[];
-extern u32 bpf_jit_load_byte_negative_offset[];
-extern u32 bpf_jit_load_byte_msh_negative_offset[];
-
-#else
-#define r_RESULT	%o0
-#define r_SKB		%o0
-#define r_OFF		%o1
-#define r_HEADLEN	%l4
-#define r_SKB_DATA	%l5
-#define r_TMP		%g1
-#define r_TMP2		%g3
 #endif

 #endif /* _BPF_JIT_H */
--- a/arch/sparc/net/bpf_jit_asm_64.S
+++ b/arch/sparc/net/bpf_jit_asm_64.S
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/ptrace.h>
-
-#include "bpf_jit_64.h"
-
-#define SAVE_SZ		176
-#define SCRATCH_OFF	STACK_BIAS + 128
-#define BE_PTR(label)	be,pn %xcc, label
-#define SIGN_EXTEND(reg)	sra reg, 0, reg
-
-#define SKF_MAX_NEG_OFF	(-0x200000) /* SKF_LL_OFF from filter.h */
-
-	.text
-	.globl	bpf_jit_load_word
-bpf_jit_load_word:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_word_neg
-	 nop
-	.globl	bpf_jit_load_word_positive_offset
-bpf_jit_load_word_positive_offset:
-	sub	r_HEADLEN, r_OFF, r_TMP
-	cmp	r_TMP, 3
-	ble	bpf_slow_path_word
-	 add	r_SKB_DATA, r_OFF, r_TMP
-	andcc	r_TMP, 3, %g0
-	bne	load_word_unaligned
-	 nop
-	retl
-	 ld	[r_TMP], r_RESULT
-load_word_unaligned:
-	ldub	[r_TMP + 0x0], r_OFF
-	ldub	[r_TMP + 0x1], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	or	r_OFF, r_TMP2, r_OFF
-	ldub	[r_TMP + 0x2], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	or	r_OFF, r_TMP2, r_OFF
-	ldub	[r_TMP + 0x3], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	retl
-	 or	r_OFF, r_TMP2, r_RESULT
-
-	.globl	bpf_jit_load_half
-bpf_jit_load_half:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_half_neg
-	 nop
-	.globl	bpf_jit_load_half_positive_offset
-bpf_jit_load_half_positive_offset:
-	sub	r_HEADLEN, r_OFF, r_TMP
-	cmp	r_TMP, 1
-	ble	bpf_slow_path_half
-	 add	r_SKB_DATA, r_OFF, r_TMP
-	andcc	r_TMP, 1, %g0
-	bne	load_half_unaligned
-	 nop
-	retl
-	 lduh	[r_TMP], r_RESULT
-load_half_unaligned:
-	ldub	[r_TMP + 0x0], r_OFF
-	ldub	[r_TMP + 0x1], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	retl
-	 or	r_OFF, r_TMP2, r_RESULT
-
-	.globl	bpf_jit_load_byte
-bpf_jit_load_byte:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_byte_neg
-	 nop
-	.globl	bpf_jit_load_byte_positive_offset
-bpf_jit_load_byte_positive_offset:
-	cmp	r_OFF, r_HEADLEN
-	bge	bpf_slow_path_byte
-	 nop
-	retl
-	 ldub	[r_SKB_DATA + r_OFF], r_RESULT
-
-#define bpf_slow_path_common(LEN)	\
-	save	%sp, -SAVE_SZ, %sp;	\
-	mov	%i0, %o0;		\
-	mov	%i1, %o1;		\
-	add	%fp, SCRATCH_OFF, %o2;	\
-	call	skb_copy_bits;		\
-	 mov	(LEN), %o3;		\
-	cmp	%o0, 0;			\
-	restore;
-
-bpf_slow_path_word:
-	bpf_slow_path_common(4)
-	bl	bpf_error
-	 ld	[%sp + SCRATCH_OFF], r_RESULT
-	retl
-	 nop
-bpf_slow_path_half:
-	bpf_slow_path_common(2)
-	bl	bpf_error
-	 lduh	[%sp + SCRATCH_OFF], r_RESULT
-	retl
-	 nop
-bpf_slow_path_byte:
-	bpf_slow_path_common(1)
-	bl	bpf_error
-	 ldub	[%sp + SCRATCH_OFF], r_RESULT
-	retl
-	 nop
-
-#define bpf_negative_common(LEN)			\
-	save	%sp, -SAVE_SZ, %sp;			\
-	mov	%i0, %o0;				\
-	mov	%i1, %o1;				\
-	SIGN_EXTEND(%o1);				\
-	call	bpf_internal_load_pointer_neg_helper;	\
-	 mov	(LEN), %o2;				\
-	mov	%o0, r_TMP;				\
-	cmp	%o0, 0;					\
-	BE_PTR(bpf_error);				\
-	 restore;
-
-bpf_slow_path_word_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_word_negative_offset
-bpf_jit_load_word_negative_offset:
-	bpf_negative_common(4)
-	andcc	r_TMP, 3, %g0
-	bne	load_word_unaligned
-	 nop
-	retl
-	 ld	[r_TMP], r_RESULT
-
-bpf_slow_path_half_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_half_negative_offset
-bpf_jit_load_half_negative_offset:
-	bpf_negative_common(2)
-	andcc	r_TMP, 1, %g0
-	bne	load_half_unaligned
-	 nop
-	retl
-	 lduh	[r_TMP], r_RESULT
-
-bpf_slow_path_byte_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_byte_negative_offset
-bpf_jit_load_byte_negative_offset:
-	bpf_negative_common(1)
-	retl
-	 ldub	[r_TMP], r_RESULT
-
-bpf_error:
-	/* Make the JIT program itself return zero. */
-	ret
-	restore	%g0, %g0, %o0
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_)
 	}
 }

-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG    2 /* ebx is used */
-#define SEEN_MEM     4 /* use mem[] for temporary storage */
-
 #define S13(X)		((X) & 0x1fff)
 #define S5(X)		((X) & 0x1f)
 #define IMMED		0x00002000
@@ -198,7 +194,6 @@ struct jit_ctx {
 	bool 			tmp_1_used;
 	bool 			tmp_2_used;
 	bool 			tmp_3_used;
-	bool			saw_ld_abs_ind;
 	bool			saw_frame_pointer;
 	bool			saw_call;
 	bool			saw_tail_call;
@@ -207,9 +202,7 @@ struct jit_ctx {

 #define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
-#define SKB_HLEN_REG	(MAX_BPF_JIT_REG + 2)
-#define SKB_DATA_REG	(MAX_BPF_JIT_REG + 3)
-#define TMP_REG_3	(MAX_BPF_JIT_REG + 4)
+#define TMP_REG_3	(MAX_BPF_JIT_REG + 2)

 /* Map BPF registers to SPARC registers */
 static const int bpf2sparc[] = {
@@ -238,9 +231,6 @@ static const int bpf2sparc[] = {
 	[TMP_REG_1] = G1,
 	[TMP_REG_2] = G2,
 	[TMP_REG_3] = G3,
-
-	[SKB_HLEN_REG] = L4,
-	[SKB_DATA_REG] = L5,
 };

 static void emit(const u32 insn, struct jit_ctx *ctx)
@@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 	return 0;
 }

-static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb)
-{
-	const u8 r_headlen = bpf2sparc[SKB_HLEN_REG];
-	const u8 r_data = bpf2sparc[SKB_DATA_REG];
-	const u8 r_tmp = bpf2sparc[TMP_REG_1];
-	unsigned int off;
-
-	off = offsetof(struct sk_buff, len);
-	emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx);
-
-	off = offsetof(struct sk_buff, data_len);
-	emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx);
-
-	emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx);
-
-	off = offsetof(struct sk_buff, data);
-	emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx);
-}
-
 /* Just skip the save instruction and the ctx register move.  */
 #define BPF_TAILCALL_PROLOGUE_SKIP	16
 #define BPF_TAILCALL_CNT_SP_OFF		(STACK_BIAS + 128)
@@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx)

 	emit_reg_move(I0, O0, ctx);
 	/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
-
-	if (ctx->saw_ld_abs_ind)
-		load_skb_regs(ctx, bpf2sparc[BPF_REG_1]);
 }

 static void build_epilogue(struct jit_ctx *ctx)
@@ -1225,16 +1193,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		u8 *func = ((u8 *)__bpf_call_base) + imm;

 		ctx->saw_call = true;
-		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
-			emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);

 		emit_call((u32 *)func, ctx);
 		emit_nop(ctx);

 		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
-
-		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
-			load_skb_regs(ctx, L7);
 		break;
 	}

@@ -1412,43 +1375,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		emit_nop(ctx);
 		break;
 	}
-#define CHOOSE_LOAD_FUNC(K, func) \
-		((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
-	case BPF_LD | BPF_ABS | BPF_W:
-		func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word);
-		goto common_load;
-	case BPF_LD | BPF_ABS | BPF_H:
-		func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half);
-		goto common_load;
-	case BPF_LD | BPF_ABS | BPF_B:
-		func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte);
-		goto common_load;
-	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
-	case BPF_LD | BPF_IND | BPF_W:
-		func = bpf_jit_load_word;
-		goto common_load;
-	case BPF_LD | BPF_IND | BPF_H:
-		func = bpf_jit_load_half;
-		goto common_load;
-
-	case BPF_LD | BPF_IND | BPF_B:
-		func = bpf_jit_load_byte;
-	common_load:
-		ctx->saw_ld_abs_ind = true;
-
-		emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx);
-		emit_loadimm(imm, O1, ctx);
-
-		if (BPF_MODE(code) == BPF_IND)
-			emit_alu(ADD, src, O1, ctx);
-
-		emit_call(func, ctx);
-		emit_alu_K(SRA, O1, 0, ctx);
-
-		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
-		break;

 	default:
 		pr_err_once("unknown opcode %02x\n", code);
@@ -1583,12 +1509,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		build_epilogue(&ctx);

 		if (bpf_jit_enable > 1)
-			pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass,
+			pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
 				image_size - (ctx.idx * 4),
 				ctx.tmp_1_used ? '1' : ' ',
 				ctx.tmp_2_used ? '2' : ' ',
 				ctx.tmp_3_used ? '3' : ' ',
-				ctx.saw_ld_abs_ind ? 'L' : ' ',
 				ctx.saw_frame_pointer ? 'F' : ' ',
 				ctx.saw_call ? 'C' : ' ',
 				ctx.saw_tail_call ? 'T' : ' ');

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -138,7 +138,7 @@ config X86
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select HAVE_EBPF_JIT			if X86_64
+	select HAVE_EBPF_JIT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EXIT_THREAD
 	select HAVE_FENTRY			if X86_64 || DYNAMIC_FTRACE

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -291,16 +291,20 @@ do {									\
 *    lfence
 *    jmp spec_trap
 *  do_rop:
- *    mov %rax,(%rsp)
+ *    mov %rax,(%rsp) for x86_64
+ *    mov %edx,(%esp) for x86_32
 *    retq
 *
 * Without retpolines configured:
 *
- *    jmp *%rax
+ *    jmp *%rax for x86_64
+ *    jmp *%edx for x86_32
 */
 #ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_64
 # define RETPOLINE_RAX_BPF_JIT_SIZE	17
 # define RETPOLINE_RAX_BPF_JIT()				\
+do {								\
 	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\
 	/* spec_trap: */					\
 	EMIT2(0xF3, 0x90);       /* pause */			\
@@ -308,11 +312,31 @@ do {									\
 	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
 	/* do_rop: */						\
 	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
-	EMIT1(0xC3);             /* retq */
+	EMIT1(0xC3);             /* retq */			\
+} while (0)
 #else
+# define RETPOLINE_EDX_BPF_JIT()				\
+do {								\
+	EMIT1_off32(0xE8, 7);	 /* call do_rop */		\
+	/* spec_trap: */					\
+	EMIT2(0xF3, 0x90);       /* pause */			\
+	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\
+	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
+	/* do_rop: */						\
+	EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */		\
+	EMIT1(0xC3);             /* ret */			\
+} while (0)
+#endif
+#else /* !CONFIG_RETPOLINE */
+
+#ifdef CONFIG_X86_64
 # define RETPOLINE_RAX_BPF_JIT_SIZE	2
 # define RETPOLINE_RAX_BPF_JIT()				\
 	EMIT2(0xFF, 0xE0);	 /* jmp *%rax */
+#else
+# define RETPOLINE_EDX_BPF_JIT()				\
+	EMIT2(0xFF, 0xE2) /* jmp *%edx */
+#endif
 #endif

 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
 #
 # Arch-specific network modules
 #
-OBJECT_FILES_NON_STANDARD_bpf_jit.o += y

-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+ifeq ($(CONFIG_X86_32),y)
+        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
+else
+        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+endif
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
-/* bpf_jit.S : BPF JIT helper functions
- *
- * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-/*
- * Calling convention :
- * rbx : skb pointer (callee saved)
- * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r10 : copy of skb->data
- * r9d : hlen = skb->len - skb->data_len
- */
-#define SKBDATA	%r10
-#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
-
-#define FUNC(name) \
-	.globl name; \
-	.type name, @function; \
-	name:
-
-FUNC(sk_load_word)
-	test	%esi,%esi
-	js	bpf_slow_path_word_neg
-
-FUNC(sk_load_word_positive_offset)
-	mov	%r9d,%eax		# hlen
-	sub	%esi,%eax		# hlen - offset
-	cmp	$3,%eax
-	jle	bpf_slow_path_word
-	mov     (SKBDATA,%rsi),%eax
-	bswap   %eax  			/* ntohl() */
-	ret
-
-FUNC(sk_load_half)
-	test	%esi,%esi
-	js	bpf_slow_path_half_neg
-
-FUNC(sk_load_half_positive_offset)
-	mov	%r9d,%eax
-	sub	%esi,%eax		#	hlen - offset
-	cmp	$1,%eax
-	jle	bpf_slow_path_half
-	movzwl	(SKBDATA,%rsi),%eax
-	rol	$8,%ax			# ntohs()
-	ret
-
-FUNC(sk_load_byte)
-	test	%esi,%esi
-	js	bpf_slow_path_byte_neg
-
-FUNC(sk_load_byte_positive_offset)
-	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
-	jle	bpf_slow_path_byte
-	movzbl	(SKBDATA,%rsi),%eax
-	ret
-
-/* rsi contains offset and can be scratched */
-#define bpf_slow_path_common(LEN)		\
-	lea	32(%rbp), %rdx;\
-	FRAME_BEGIN;				\
-	mov	%rbx, %rdi; /* arg1 == skb */	\
-	push	%r9;				\
-	push	SKBDATA;			\
-/* rsi already has offset */			\
-	mov	$LEN,%ecx;	/* len */	\
-	call	skb_copy_bits;			\
-	test    %eax,%eax;			\
-	pop	SKBDATA;			\
-	pop	%r9;				\
-	FRAME_END
-
-
-bpf_slow_path_word:
-	bpf_slow_path_common(4)
-	js	bpf_error
-	mov	32(%rbp),%eax
-	bswap	%eax
-	ret
-
-bpf_slow_path_half:
-	bpf_slow_path_common(2)
-	js	bpf_error
-	mov	32(%rbp),%ax
-	rol	$8,%ax
-	movzwl	%ax,%eax
-	ret
-
-bpf_slow_path_byte:
-	bpf_slow_path_common(1)
-	js	bpf_error
-	movzbl	32(%rbp),%eax
-	ret
-
-#define sk_negative_common(SIZE)				\
-	FRAME_BEGIN;						\
-	mov	%rbx, %rdi; /* arg1 == skb */			\
-	push	%r9;						\
-	push	SKBDATA;					\
-/* rsi already has offset */					\
-	mov	$SIZE,%edx;	/* size */			\
-	call	bpf_internal_load_pointer_neg_helper;		\
-	test	%rax,%rax;					\
-	pop	SKBDATA;					\
-	pop	%r9;						\
-	FRAME_END;						\
-	jz	bpf_error
-
-bpf_slow_path_word_neg:
-	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
-	jl	bpf_error	/* offset lower -> error  */
-
-FUNC(sk_load_word_negative_offset)
-	sk_negative_common(4)
-	mov	(%rax), %eax
-	bswap	%eax
-	ret
-
-bpf_slow_path_half_neg:
-	cmp	SKF_MAX_NEG_OFF, %esi
-	jl	bpf_error
-
-FUNC(sk_load_half_negative_offset)
-	sk_negative_common(2)
-	mov	(%rax),%ax
-	rol	$8,%ax
-	movzwl	%ax,%eax
-	ret
-
-bpf_slow_path_byte_neg:
-	cmp	SKF_MAX_NEG_OFF, %esi
-	jl	bpf_error
-
-FUNC(sk_load_byte_negative_offset)
-	sk_negative_common(1)
-	movzbl	(%rax), %eax
-	ret
-
-bpf_error:
-# force a return 0 from jit handler
-	xor	%eax,%eax
-	mov	(%rbp),%rbx
-	mov	8(%rbp),%r13
-	mov	16(%rbp),%r14
-	mov	24(%rbp),%r15
-	add	$40, %rbp
-	leaveq
-	ret
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
 	return nfp_bpf_cmsg_alloc(bpf, size);
 }

+static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
+{
+	struct cmsg_hdr *hdr;
+
+	hdr = (struct cmsg_hdr *)skb->data;
+
+	return hdr->type;
+}
+
 static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
 {
 	struct cmsg_hdr *hdr;
@@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
 		goto err_free;
 	}

+	if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
+		nfp_bpf_event_output(bpf, skb);
+		return;
+	}
+
 	nfp_ctrl_lock(bpf->app->ctrl);

 	tag = nfp_bpf_cmsg_get_tag(skb);

--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -37,6 +37,14 @@
 #include <linux/bitops.h>
 #include <linux/types.h>

+/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
+ * our FW ABI.  In that case we will do translation in the driver.
+ */
+#define NFP_BPF_SCALAR_VALUE		1
+#define NFP_BPF_MAP_VALUE		4
+#define NFP_BPF_STACK			6
+#define NFP_BPF_PACKET_DATA		8
+
 enum bpf_cap_tlv_type {
 	NFP_BPF_CAP_TYPE_FUNC		= 1,
 	NFP_BPF_CAP_TYPE_ADJUST_HEAD	= 2,
@@ -81,6 +89,7 @@ enum nfp_bpf_cmsg_type {
 	CMSG_TYPE_MAP_DELETE	= 5,
 	CMSG_TYPE_MAP_GETNEXT	= 6,
 	CMSG_TYPE_MAP_GETFIRST	= 7,
+	CMSG_TYPE_BPF_EVENT	= 8,
 	__CMSG_TYPE_MAP_MAX,
 };

@@ -155,4 +164,13 @@ struct cmsg_reply_map_op {
 	__be32 resv;
 	struct cmsg_key_value_pair elem[0];
 };
+
+struct cmsg_bpf_event {
+	struct cmsg_hdr hdr;
+	__be32 cpu_id;
+	__be64 map_ptr;
+	__be32 data_size;
+	__be32 pkt_size;
+	u8 data[0];
+};
 #endif
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -1395,15 +1395,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 static int
 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	struct bpf_offloaded_map *offmap;
-	struct nfp_bpf_map *nfp_map;
 	bool load_lm_ptr;
 	u32 ret_tgt;
 	s64 lm_off;
-	swreg tid;
-
-	offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
-	nfp_map = offmap->dev_priv;

 	/* We only have to reload LM0 if the key is not at start of stack */
 	lm_off = nfp_prog->stack_depth;
@@ -1416,17 +1410,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	if (meta->func_id == BPF_FUNC_map_update_elem)
 		emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);

-	/* Load map ID into a register, it should actually fit as an immediate
-	 * but in case it doesn't deal with it here, not in the delay slots.
-	 */
-	tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
-
 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
 		     2, RELO_BR_HELPER);
 	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;

 	/* Load map ID into A0 */
-	wrp_mov(nfp_prog, reg_a(0), tid);
+	wrp_mov(nfp_prog, reg_a(0), reg_a(2));

 	/* Load the return address into B0 */
 	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
@@ -1456,6 +1445,31 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }

+static int
+nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	swreg ptr_type;
+	u32 ret_tgt;
+
+	ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
+
+	ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
+
+	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
+		     2, RELO_BR_HELPER);
+
+	/* Load ptr type into A1 */
+	wrp_mov(nfp_prog, reg_a(1), ptr_type);
+
+	/* Load the return address into B0 */
+	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
+		return -EINVAL;
+
+	return 0;
+}
+
 /* --- Callbacks --- */
 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
@@ -2411,6 +2425,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 		return map_call_stack_common(nfp_prog, meta);
 	case BPF_FUNC_get_prandom_u32:
 		return nfp_get_prandom_u32(nfp_prog, meta);
+	case BPF_FUNC_perf_event_output:
+		return nfp_perf_event_output(nfp_prog, meta);
 	default:
 		WARN_ONCE(1, "verifier allowed unsupported function\n");
 		return -EOPNOTSUPP;
@@ -3227,6 +3243,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
 	return 0;
 }

+static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2;
+	struct nfp_bpf_map *nfp_map;
+	struct bpf_map *map;
+
+	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+		if (meta1->skip || meta2->skip)
+			continue;
+
+		if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
+		    meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
+			continue;
+
+		map = (void *)(unsigned long)((u32)meta1->insn.imm |
+					      (u64)meta2->insn.imm << 32);
+		if (bpf_map_offload_neutral(map))
+			continue;
+		nfp_map = map_to_offmap(map)->dev_priv;
+
+		meta1->insn.imm = nfp_map->tid;
+		meta2->insn.imm = 0;
+	}
+
+	return 0;
+}
+
 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
 {
 	__le64 *ustore = (__force __le64 *)prog;
@@ -3263,6 +3306,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog)
 {
 	int ret;

+	ret = nfp_bpf_replace_map_ptrs(nfp_prog);
+	if (ret)
+		return ret;
+
 	ret = nfp_bpf_optimize(nfp_prog);
 	if (ret)
 		return ret;
@@ -3353,6 +3400,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
 			case BPF_FUNC_map_delete_elem:
 				val = nfp_prog->bpf->helpers.map_delete;
 				break;
+			case BPF_FUNC_perf_event_output:
+				val = nfp_prog->bpf->helpers.perf_event_output;
+				break;
 			default:
 				pr_err("relocation of unknown helper %d\n",
 				       val);

--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -43,6 +43,14 @@
 #include "fw.h"
 #include "main.h"

+const struct rhashtable_params nfp_bpf_maps_neutral_params = {
+	.nelem_hint		= 4,
+	.key_len		= FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
+	.key_offset		= offsetof(struct nfp_bpf_neutral_map, ptr),
+	.head_offset		= offsetof(struct nfp_bpf_neutral_map, l),
+	.automatic_shrinking	= true,
+};
+
 static bool nfp_net_ebpf_capable(struct nfp_net *nn)
 {
 #ifdef __LITTLE_ENDIAN
@@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
 	case BPF_FUNC_map_delete_elem:
 		bpf->helpers.map_delete = readl(&cap->func_addr);
 		break;
+	case BPF_FUNC_perf_event_output:
+		bpf->helpers.perf_event_output = readl(&cap->func_addr);
+		break;
 	}

 	return 0;
@@ -401,17 +412,28 @@ static int nfp_bpf_init(struct nfp_app *app)
 	init_waitqueue_head(&bpf->cmsg_wq);
 	INIT_LIST_HEAD(&bpf->map_list);

-	err = nfp_bpf_parse_capabilities(app);
+	err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
 	if (err)
 		goto err_free_bpf;

+	err = nfp_bpf_parse_capabilities(app);
+	if (err)
+		goto err_free_neutral_maps;
+
 	return 0;

+err_free_neutral_maps:
+	rhashtable_destroy(&bpf->maps_neutral);
 err_free_bpf:
 	kfree(bpf);
 	return err;
 }

+static void nfp_check_rhashtable_empty(void *ptr, void *arg)
+{
+	WARN_ON_ONCE(1);
+}
+
 static void nfp_bpf_clean(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf = app->priv;
@@ -419,6 +441,8 @@ static void nfp_bpf_clean(struct nfp_app *app)
 	WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
 	WARN_ON(!list_empty(&bpf->map_list));
 	WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
+	rhashtable_free_and_destroy(&bpf->maps_neutral,
+				    nfp_check_rhashtable_empty, NULL);
 	kfree(bpf);
 }


--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -39,6 +39,7 @@
 #include <linux/bpf_verifier.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/rhashtable.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <linux/wait.h>
@@ -114,6 +115,8 @@ enum pkt_vec {
 * @maps_in_use:	number of currently offloaded maps
 * @map_elems_in_use:	number of elements allocated to offloaded maps
 *
+ * @maps_neutral:	hash table of offload-neutral maps (on pointer)
+ *
 * @adjust_head:	adjust head capability
 * @adjust_head.flags:		extra flags for adjust head
 * @adjust_head.off_min:	minimal packet offset within buffer required
@@ -133,6 +136,7 @@ enum pkt_vec {
 * @helpers.map_lookup:		map lookup helper address
 * @helpers.map_update:		map update helper address
 * @helpers.map_delete:		map delete helper address
+ * @helpers.perf_event_output:	output perf event to a ring buffer
 *
 * @pseudo_random:	FW initialized the pseudo-random machinery (CSRs)
 */
@@ -150,6 +154,8 @@ struct nfp_app_bpf {
 	unsigned int maps_in_use;
 	unsigned int map_elems_in_use;

+	struct rhashtable maps_neutral;
+
 	struct nfp_bpf_cap_adjust_head {
 		u32 flags;
 		int off_min;
@@ -171,6 +177,7 @@ struct nfp_app_bpf {
 		u32 map_lookup;
 		u32 map_update;
 		u32 map_delete;
+		u32 perf_event_output;
 	} helpers;

 	bool pseudo_random;
@@ -199,6 +206,14 @@ struct nfp_bpf_map {
 	enum nfp_bpf_map_use use_map[];
 };

+struct nfp_bpf_neutral_map {
+	struct rhash_head l;
+	struct bpf_map *ptr;
+	u32 count;
+};
+
+extern const struct rhashtable_params nfp_bpf_maps_neutral_params;
+
 struct nfp_prog;
 struct nfp_insn_meta;
 typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
@@ -367,6 +382,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
 * @error: error code if something went wrong
 * @stack_depth: max stack depth from the verifier
 * @adjust_head_location: if program has single adjust head call - the insn no.
+ * @map_records_cnt: the number of map pointers recorded for this prog
+ * @map_records: the map record pointers from bpf->maps_neutral
 * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
 */
 struct nfp_prog {
@@ -390,6 +407,9 @@ struct nfp_prog {
 	unsigned int stack_depth;
 	unsigned int adjust_head_location;

+	unsigned int map_records_cnt;
+	struct nfp_bpf_neutral_map **map_records;
+
 	struct list_head insns;
 };

@@ -440,5 +460,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
 int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
 			       void *key, void *next_key);

+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
+
 void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
 #endif
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -56,6 +56,126 @@
 #include "../nfp_net_ctrl.h"
 #include "../nfp_net.h"

+static int
+nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+		   struct bpf_map *map)
+{
+	struct nfp_bpf_neutral_map *record;
+	int err;
+
+	/* Map record paths are entered via ndo, update side is protected. */
+	ASSERT_RTNL();
+
+	/* Reuse path - other offloaded program is already tracking this map. */
+	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+					nfp_bpf_maps_neutral_params);
+	if (record) {
+		nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
+		record->count++;
+		return 0;
+	}
+
+	/* Grab a single ref to the map for our record.  The prog destroy ndo
+	 * happens after free_used_maps().
+	 */
+	map = bpf_map_inc(map, false);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	record = kmalloc(sizeof(*record), GFP_KERNEL);
+	if (!record) {
+		err = -ENOMEM;
+		goto err_map_put;
+	}
+
+	record->ptr = map;
+	record->count = 1;
+
+	err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
+				     nfp_bpf_maps_neutral_params);
+	if (err)
+		goto err_free_rec;
+
+	nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
+
+	return 0;
+
+err_free_rec:
+	kfree(record);
+err_map_put:
+	bpf_map_put(map);
+	return err;
+}
+
+static void
+nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
+{
+	bool freed = false;
+	int i;
+
+	ASSERT_RTNL();
+
+	for (i = 0; i < nfp_prog->map_records_cnt; i++) {
+		if (--nfp_prog->map_records[i]->count) {
+			nfp_prog->map_records[i] = NULL;
+			continue;
+		}
+
+		WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral,
+					       &nfp_prog->map_records[i]->l,
+					       nfp_bpf_maps_neutral_params));
+		freed = true;
+	}
+
+	if (freed) {
+		synchronize_rcu();
+
+		for (i = 0; i < nfp_prog->map_records_cnt; i++)
+			if (nfp_prog->map_records[i]) {
+				bpf_map_put(nfp_prog->map_records[i]->ptr);
+				kfree(nfp_prog->map_records[i]);
+			}
+	}
+
+	kfree(nfp_prog->map_records);
+	nfp_prog->map_records = NULL;
+	nfp_prog->map_records_cnt = 0;
+}
+
+static int
+nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+		    struct bpf_prog *prog)
+{
+	int i, cnt, err;
+
+	/* Quickly count the maps we will have to remember */
+	cnt = 0;
+	for (i = 0; i < prog->aux->used_map_cnt; i++)
+		if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
+			cnt++;
+	if (!cnt)
+		return 0;
+
+	nfp_prog->map_records = kmalloc_array(cnt,
+					      sizeof(nfp_prog->map_records[0]),
+					      GFP_KERNEL);
+	if (!nfp_prog->map_records)
+		return -ENOMEM;
+
+	for (i = 0; i < prog->aux->used_map_cnt; i++)
+		if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
+			err = nfp_map_ptr_record(bpf, nfp_prog,
+						 prog->aux->used_maps[i]);
+			if (err) {
+				nfp_map_ptrs_forget(bpf, nfp_prog);
+				return err;
+			}
+		}
+	WARN_ON(cnt != nfp_prog->map_records_cnt);
+
+	return 0;
+}
+
 static int
 nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
 		 unsigned int cnt)
@@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
 	prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
 	prog->aux->offload->jited_image = nfp_prog->prog;

-	return 0;
+	return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
 }

 static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
@@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;

 	kvfree(nfp_prog->prog);
+	nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
 	nfp_prog_free(nfp_prog);

 	return 0;
@@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
 	}
 }

+static unsigned long
+nfp_bpf_perf_event_copy(void *dst, const void *src,
+			unsigned long off, unsigned long len)
+{
+	memcpy(dst, src + off, len);
+	return 0;
+}
+
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
+{
+	struct cmsg_bpf_event *cbe = (void *)skb->data;
+	u32 pkt_size, data_size;
+	struct bpf_map *map;
+
+	if (skb->len < sizeof(struct cmsg_bpf_event))
+		goto err_drop;
+
+	pkt_size = be32_to_cpu(cbe->pkt_size);
+	data_size = be32_to_cpu(cbe->data_size);
+	map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
+
+	if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+		goto err_drop;
+	if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
+		goto err_drop;
+
+	rcu_read_lock();
+	if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+				    nfp_bpf_maps_neutral_params)) {
+		rcu_read_unlock();
+		pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
+			map);
+		goto err_drop;
+	}
+
+	bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
+			 &cbe->data[round_up(pkt_size, 4)], data_size,
+			 cbe->data, pkt_size, nfp_bpf_perf_event_copy);
+	rcu_read_unlock();
+
+	dev_consume_skb_any(skb);
+	return 0;
+err_drop:
+	dev_kfree_skb_any(skb);
+	return -EINVAL;
+}
+
 static int
 nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
 		 struct netlink_ext_ack *extack)

--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -36,6 +36,8 @@
 #include <linux/kernel.h>
 #include <linux/pkt_cls.h>

+#include "../nfp_app.h"
+#include "../nfp_main.h"
 #include "fw.h"
 #include "main.h"

@@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
 		return false;
 	}

-	/* Rest of the checks is only if we re-parse the same insn */
-	if (!meta->func_id)
-		return true;
-
-	if (meta->arg1.map_ptr != reg1->map_ptr) {
-		pr_vlog(env, "%s: called for different map\n", fname);
-		return false;
-	}
-
 	return true;
 }

@@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 		pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
 		return -EOPNOTSUPP;

+	case BPF_FUNC_perf_event_output:
+		BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE ||
+			     NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE ||
+			     NFP_BPF_STACK != PTR_TO_STACK ||
+			     NFP_BPF_PACKET_DATA != PTR_TO_PACKET);
+
+		if (!bpf->helpers.perf_event_output) {
+			pr_vlog(env, "event_output: not supported by FW\n");
+			return -EOPNOTSUPP;
+		}
+
+		/* Force current CPU to make sure we can report the event
+		 * wherever we get the control message from FW.
+		 */
+		if (reg3->var_off.mask & BPF_F_INDEX_MASK ||
+		    (reg3->var_off.value & BPF_F_INDEX_MASK) !=
+		    BPF_F_CURRENT_CPU) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off);
+			pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n",
+				tn_buf);
+			return -EOPNOTSUPP;
+		}
+
+		/* Save space in meta, we don't care about arguments other
+		 * than 4th meta, shove it into arg1.
+		 */
+		reg1 = cur_regs(env) + BPF_REG_4;
+
+		if (reg1->type != SCALAR_VALUE /* NULL ptr */ &&
+		    reg1->type != PTR_TO_STACK &&
+		    reg1->type != PTR_TO_MAP_VALUE &&
+		    reg1->type != PTR_TO_PACKET) {
+			pr_vlog(env, "event_output: unsupported ptr type: %d\n",
+				reg1->type);
+			return -EOPNOTSUPP;
+		}
+
+		if (reg1->type == PTR_TO_STACK &&
+		    !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL))
+			return -EOPNOTSUPP;
+
+		/* Warn user that on offload NFP may return success even if map
+		 * is not going to accept the event, since the event output is
+		 * fully async and device won't know the state of the map.
+		 * There is also FW limitation on the event length.
+		 *
+		 * Lost events will not show up on the perf ring, driver
+		 * won't see them at all.  Events may also get reordered.
+		 */
+		dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev,
+			      "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n");
+		pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n");
+
+		if (!meta->func_id)
+			break;
+
+		if (reg1->type != meta->arg1.type) {
+			pr_vlog(env, "event_output: ptr type changed: %d %d\n",
+				meta->arg1.type, reg1->type);
+			return -EINVAL;
+		}
+		break;
+
 	default:
 		pr_vlog(env, "unsupported function id: %d\n", func_id);
 		return -EOPNOTSUPP;

--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
 *
 * This software is dual licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this

--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -110,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
 	return container_of(map, struct bpf_offloaded_map, map);
 }

+static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
+{
+	return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
 static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 {
 	return map->ops->map_seq_show_elem && map->ops->map_check_btf;
@@ -235,6 +240,8 @@ struct bpf_verifier_ops {
 				struct bpf_insn_access_aux *info);
 	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
 			    const struct bpf_prog *prog);
+	int (*gen_ld_abs)(const struct bpf_insn *orig,
+			  struct bpf_insn *insn_buf);
 	u32 (*convert_ctx_access)(enum bpf_access_type type,
 				  const struct bpf_insn *src,
 				  struct bpf_insn *dst,
@@ -676,6 +683,31 @@ static inline int sock_map_prog(struct bpf_map *map,
 }
 #endif

+#if defined(CONFIG_XDP_SOCKETS)
+struct xdp_sock;
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+		       struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+#else
+struct xdp_sock;
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+						     u32 key)
+{
+	return NULL;
+}
+
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+				     struct xdp_sock *xs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+#endif
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
@@ -689,9 +721,8 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
-extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
-extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_get_stack_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;

 /* Shared helpers among cBPF and eBPF. */

--- a/include/linux/bpf_trace.h
+++ b/include/linux/bpf_trace.h
@@ -2,7 +2,6 @@
 #ifndef __LINUX_BPF_TRACE_H__
 #define __LINUX_BPF_TRACE_H__

-#include <trace/events/bpf.h>
 #include <trace/events/xdp.h>

 #endif /* __LINUX_BPF_TRACE_H__ */
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -49,4 +49,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
+#if defined(CONFIG_XDP_SOCKETS)
+BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
+#endif
 #endif
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -173,6 +173,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)

 #define BPF_MAX_SUBPROGS 256

+struct bpf_subprog_info {
+	u32 start; /* insn idx of function entry point */
+	u16 stack_depth; /* max. stack depth used by this function */
+};
+
 /* single container for all structs
 * one verifier_env per bpf_check() call
 */
@@ -191,9 +196,7 @@ struct bpf_verifier_env {
 	bool seen_direct_write;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 	struct bpf_verifier_log log;
-	u32 subprog_starts[BPF_MAX_SUBPROGS];
-	/* computes the stack depth of each bpf function */
-	u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
+	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
 	u32 subprog_cnt;
 };


--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -47,7 +47,9 @@ struct xdp_buff;
 /* Additional register mappings for converted user programs. */
 #define BPF_REG_A	BPF_REG_0
 #define BPF_REG_X	BPF_REG_7
-#define BPF_REG_TMP	BPF_REG_8
+#define BPF_REG_TMP	BPF_REG_2	/* scratch reg */
+#define BPF_REG_D	BPF_REG_8	/* data, callee-saved */
+#define BPF_REG_H	BPF_REG_9	/* hlen, callee-saved */

 /* Kernel hidden auxiliary/helper register for hardening step.
 * Only used by eBPF JITs. It's nothing more than a temporary
@@ -468,7 +470,8 @@ struct bpf_prog {
 				dst_needed:1,	/* Do we need dst entry? */
 				blinded:1,	/* Was blinded */
 				is_func:1,	/* program is a bpf function */
-				kprobe_override:1; /* Do we override a kprobe? */
+				kprobe_override:1, /* Do we override a kprobe? */
+				has_callchain_buf:1; /* callchain buffer allocated? */
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	enum bpf_attach_type	expected_attach_type; /* For some prog types */
 	u32			len;		/* Number of filter blocks */
@@ -759,7 +762,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 * This does not appear to be a real limitation for existing software.
 */
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-			    struct bpf_prog *prog);
+			    struct xdp_buff *xdp, struct bpf_prog *prog);
 int xdp_do_redirect(struct net_device *dev,
 		    struct xdp_buff *xdp,
 		    struct bpf_prog *prog);

--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2510,6 +2510,7 @@ void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
 void unregister_netdevice_many(struct list_head *head);

--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -207,8 +207,9 @@ struct ucred {
 				 * PF_SMC protocol family that
 				 * reuses AF_INET address family
 				 */
+#define AF_XDP		44	/* XDP sockets			*/

-#define AF_MAX		44	/* For now.. */
+#define AF_MAX		45	/* For now.. */

 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -257,6 +258,7 @@ struct ucred {
 #define PF_KCM		AF_KCM
 #define PF_QIPCRTR	AF_QIPCRTR
 #define PF_SMC		AF_SMC
+#define PF_XDP		AF_XDP
 #define PF_MAX		AF_MAX

 /* Maximum queue length specifiable by listen.  */
@@ -338,6 +340,7 @@ struct ucred {
 #define SOL_NFC		280
 #define SOL_KCM		281
 #define SOL_TLS		282
+#define SOL_XDP		283

 /* IPX options */
 #define IPX_TYPE	1

--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
 /* Arithmetic and logical ops */
 /* Shift a tnum left (by a fixed shift) */
 struct tnum tnum_lshift(struct tnum a, u8 shift);
-/* Shift a tnum right (by a fixed shift) */
+/* Shift (rsh) a tnum right (by a fixed shift) */
 struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Shift (arsh) a tnum right (by a fixed min_shift) */
+struct tnum tnum_arshift(struct tnum a, u8 min_shift);
 /* Add two tnums, return @a + @b */
 struct tnum tnum_add(struct tnum a, struct tnum b);
 /* Subtract two tnums, return @a - @b */

--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 }

 void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_buff(struct xdp_buff *xdp);

 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 		     struct net_device *dev, u32 queue_index);

--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
+/* SPDX-License-Identifier: GPL-2.0
+ * AF_XDP internal functions
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_XDP_SOCK_H
+#define _LINUX_XDP_SOCK_H
+
+#include <linux/mutex.h>
+#include <net/sock.h>
+
+struct net_device;
+struct xsk_queue;
+struct xdp_umem;
+
+struct xdp_sock {
+	/* struct sock must be the first member of struct xdp_sock */
+	struct sock sk;
+	struct xsk_queue *rx;
+	struct net_device *dev;
+	struct xdp_umem *umem;
+	struct list_head flush_node;
+	u16 queue_id;
+	struct xsk_queue *tx ____cacheline_aligned_in_smp;
+	/* Protects multiple processes in the control path */
+	struct mutex mutex;
+	u64 rx_dropped;
+};
+
+struct xdp_buff;
+#ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+void xsk_flush(struct xdp_sock *xs);
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+#else
+static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	return -ENOTSUPP;
+}
+
+static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	return -ENOTSUPP;
+}
+
+static inline void xsk_flush(struct xdp_sock *xs)
+{
+}
+
+static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+{
+	return false;
+}
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_XDP_SOCK_H */
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM bpf
-
-#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_BPF_H
-
-/* These are only used within the BPF_SYSCALL code */
-#ifdef CONFIG_BPF_SYSCALL
-
-#include <linux/filter.h>
-#include <linux/bpf.h>
-#include <linux/fs.h>
-#include <linux/tracepoint.h>
-
-#define __PROG_TYPE_MAP(FN)	\
-	FN(SOCKET_FILTER)	\
-	FN(KPROBE)		\
-	FN(SCHED_CLS)		\
-	FN(SCHED_ACT)		\
-	FN(TRACEPOINT)		\
-	FN(XDP)			\
-	FN(PERF_EVENT)		\
-	FN(CGROUP_SKB)		\
-	FN(CGROUP_SOCK)		\
-	FN(LWT_IN)		\
-	FN(LWT_OUT)		\
-	FN(LWT_XMIT)
-
-#define __MAP_TYPE_MAP(FN)	\
-	FN(HASH)		\
-	FN(ARRAY)		\
-	FN(PROG_ARRAY)		\
-	FN(PERF_EVENT_ARRAY)	\
-	FN(PERCPU_HASH)		\
-	FN(PERCPU_ARRAY)	\
-	FN(STACK_TRACE)		\
-	FN(CGROUP_ARRAY)	\
-	FN(LRU_HASH)		\
-	FN(LRU_PERCPU_HASH)	\
-	FN(LPM_TRIE)
-
-#define __PROG_TYPE_TP_FN(x)	\
-	TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x);
-#define __PROG_TYPE_SYM_FN(x)	\
-	{ BPF_PROG_TYPE_##x, #x },
-#define __PROG_TYPE_SYM_TAB	\
-	__PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 }
-__PROG_TYPE_MAP(__PROG_TYPE_TP_FN)
-
-#define __MAP_TYPE_TP_FN(x)	\
-	TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x);
-#define __MAP_TYPE_SYM_FN(x)	\
-	{ BPF_MAP_TYPE_##x, #x },
-#define __MAP_TYPE_SYM_TAB	\
-	__MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 }
-__MAP_TYPE_MAP(__MAP_TYPE_TP_FN)
-
-DECLARE_EVENT_CLASS(bpf_prog_event,
-
-	TP_PROTO(const struct bpf_prog *prg),
-
-	TP_ARGS(prg),
-
-	TP_STRUCT__entry(
-		__array(u8, prog_tag, 8)
-		__field(u32, type)
-	),
-
-	TP_fast_assign(
-		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
-		memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
-		__entry->type = prg->type;
-	),
-
-	TP_printk("prog=%s type=%s",
-		  __print_hex_str(__entry->prog_tag, 8),
-		  __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB))
-);
-
-DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type,
-
-	TP_PROTO(const struct bpf_prog *prg),
-
-	TP_ARGS(prg)
-);
-
-DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu,
-
-	TP_PROTO(const struct bpf_prog *prg),
-
-	TP_ARGS(prg)
-);
-
-TRACE_EVENT(bpf_prog_load,
-
-	TP_PROTO(const struct bpf_prog *prg, int ufd),
-
-	TP_ARGS(prg, ufd),
-
-	TP_STRUCT__entry(
-		__array(u8, prog_tag, 8)
-		__field(u32, type)
-		__field(int, ufd)
-	),
-
-	TP_fast_assign(
-		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
-		memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
-		__entry->type = prg->type;
-		__entry->ufd  = ufd;
-	),
-
-	TP_printk("prog=%s type=%s ufd=%d",
-		  __print_hex_str(__entry->prog_tag, 8),
-		  __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB),
-		  __entry->ufd)
-);
-
-TRACE_EVENT(bpf_map_create,
-
-	TP_PROTO(const struct bpf_map *map, int ufd),
-
-	TP_ARGS(map, ufd),
-
-	TP_STRUCT__entry(
-		__field(u32, type)
-		__field(u32, size_key)
-		__field(u32, size_value)
-		__field(u32, max_entries)
-		__field(u32, flags)
-		__field(int, ufd)
-	),
-
-	TP_fast_assign(
-		__entry->type        = map->map_type;
-		__entry->size_key    = map->key_size;
-		__entry->size_value  = map->value_size;
-		__entry->max_entries = map->max_entries;
-		__entry->flags       = map->map_flags;
-		__entry->ufd         = ufd;
-	),
-
-	TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x",
-		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-		  __entry->ufd, __entry->size_key, __entry->size_value,
-		  __entry->max_entries, __entry->flags)
-);
-
-DECLARE_EVENT_CLASS(bpf_obj_prog,
-
-	TP_PROTO(const struct bpf_prog *prg, int ufd,
-		 const struct filename *pname),
-
-	TP_ARGS(prg, ufd, pname),
-
-	TP_STRUCT__entry(
-		__array(u8, prog_tag, 8)
-		__field(int, ufd)
-		__string(path, pname->name)
-	),
-
-	TP_fast_assign(
-		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
-		memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
-		__assign_str(path, pname->name);
-		__entry->ufd = ufd;
-	),
-
-	TP_printk("prog=%s path=%s ufd=%d",
-		  __print_hex_str(__entry->prog_tag, 8),
-		  __get_str(path), __entry->ufd)
-);
-
-DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog,
-
-	TP_PROTO(const struct bpf_prog *prg, int ufd,
-		 const struct filename *pname),
-
-	TP_ARGS(prg, ufd, pname)
-);
-
-DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog,
-
-	TP_PROTO(const struct bpf_prog *prg, int ufd,
-		 const struct filename *pname),
-
-	TP_ARGS(prg, ufd, pname)
-);
-
-DECLARE_EVENT_CLASS(bpf_obj_map,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const struct filename *pname),
-
-	TP_ARGS(map, ufd, pname),
-
-	TP_STRUCT__entry(
-		__field(u32, type)
-		__field(int, ufd)
-		__string(path, pname->name)
-	),
-
-	TP_fast_assign(
-		__assign_str(path, pname->name);
-		__entry->type = map->map_type;
-		__entry->ufd  = ufd;
-	),
-
-	TP_printk("map type=%s ufd=%d path=%s",
-		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-		  __entry->ufd, __get_str(path))
-);
-
-DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const struct filename *pname),
-
-	TP_ARGS(map, ufd, pname)
-);
-
-DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const struct filename *pname),
-
-	TP_ARGS(map, ufd, pname)
-);
-
-DECLARE_EVENT_CLASS(bpf_map_keyval,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const void *key, const void *val),
-
-	TP_ARGS(map, ufd, key, val),
-
-	TP_STRUCT__entry(
-		__field(u32, type)
-		__field(u32, key_len)
-		__dynamic_array(u8, key, map->key_size)
-		__field(bool, key_trunc)
-		__field(u32, val_len)
-		__dynamic_array(u8, val, map->value_size)
-		__field(bool, val_trunc)
-		__field(int, ufd)
-	),
-
-	TP_fast_assign(
-		memcpy(__get_dynamic_array(key), key, map->key_size);
-		memcpy(__get_dynamic_array(val), val, map->value_size);
-		__entry->type      = map->map_type;
-		__entry->key_len   = min(map->key_size, 16U);
-		__entry->key_trunc = map->key_size != __entry->key_len;
-		__entry->val_len   = min(map->value_size, 16U);
-		__entry->val_trunc = map->value_size != __entry->val_len;
-		__entry->ufd       = ufd;
-	),
-
-	TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]",
-		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-		  __entry->ufd,
-		  __print_hex(__get_dynamic_array(key), __entry->key_len),
-		  __entry->key_trunc ? " ..." : "",
-		  __print_hex(__get_dynamic_array(val), __entry->val_len),
-		  __entry->val_trunc ? " ..." : "")
-);
-
-DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const void *key, const void *val),
-
-	TP_ARGS(map, ufd, key, val)
-);
-
-DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const void *key, const void *val),
-
-	TP_ARGS(map, ufd, key, val)
-);
-
-TRACE_EVENT(bpf_map_delete_elem,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const void *key),
-
-	TP_ARGS(map, ufd, key),
-
-	TP_STRUCT__entry(
-		__field(u32, type)
-		__field(u32, key_len)
-		__dynamic_array(u8, key, map->key_size)
-		__field(bool, key_trunc)
-		__field(int, ufd)
-	),
-
-	TP_fast_assign(
-		memcpy(__get_dynamic_array(key), key, map->key_size);
-		__entry->type      = map->map_type;
-		__entry->key_len   = min(map->key_size, 16U);
-		__entry->key_trunc = map->key_size != __entry->key_len;
-		__entry->ufd       = ufd;
-	),
-
-	TP_printk("map type=%s ufd=%d key=[%s%s]",
-		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-		  __entry->ufd,
-		  __print_hex(__get_dynamic_array(key), __entry->key_len),
-		  __entry->key_trunc ? " ..." : "")
-);
-
-TRACE_EVENT(bpf_map_next_key,
-
-	TP_PROTO(const struct bpf_map *map, int ufd,
-		 const void *key, const void *key_next),
-
-	TP_ARGS(map, ufd, key, key_next),
-
-	TP_STRUCT__entry(
-		__field(u32, type)
-		__field(u32, key_len)
-		__dynamic_array(u8, key, map->key_size)
-		__dynamic_array(u8, nxt, map->key_size)
-		__field(bool, key_trunc)
-		__field(bool, key_null)
-		__field(int, ufd)
-	),
-
-	TP_fast_assign(
-		if (key)
-			memcpy(__get_dynamic_array(key), key, map->key_size);
-		__entry->key_null = !key;
-		memcpy(__get_dynamic_array(nxt), key_next, map->key_size);
-		__entry->type      = map->map_type;
-		__entry->key_len   = min(map->key_size, 16U);
-		__entry->key_trunc = map->key_size != __entry->key_len;
-		__entry->ufd       = ufd;
-	),
-
-	TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
-		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-		  __entry->ufd,
-		  __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),
-							   __entry->key_len),
-		  __entry->key_trunc && !__entry->key_null ? " ..." : "",
-		  __print_hex(__get_dynamic_array(nxt), __entry->key_len),
-		  __entry->key_trunc ? " ..." : "")
-);
-#endif /* CONFIG_BPF_SYSCALL */
-#endif /* _TRACE_BPF_H */
-
-#include <trace/define_trace.h>
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP,
 	BPF_MAP_TYPE_SOCKMAP,
 	BPF_MAP_TYPE_CPUMAP,
+	BPF_MAP_TYPE_XSKMAP,
 };

 enum bpf_prog_type {
@@ -828,12 +829,12 @@ union bpf_attr {
 *
 * 		Also, be aware that the newer helper
 * 		**bpf_perf_event_read_value**\ () is recommended over
- * 		**bpf_perf_event_read*\ () in general. The latter has some ABI
+ * 		**bpf_perf_event_read**\ () in general. The latter has some ABI
 * 		quirks where error and counter value are used as a return code
 * 		(which is wrong to do since ranges may overlap). This issue is
- * 		fixed with bpf_perf_event_read_value(), which at the same time
- * 		provides more features over the **bpf_perf_event_read**\ ()
- * 		interface. Please refer to the description of
+ * 		fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * 		time provides more features over the **bpf_perf_event_read**\
+ * 		() interface. Please refer to the description of
 * 		**bpf_perf_event_read_value**\ () for details.
 * 	Return
 * 		The value of the perf event counter read from the map, or a
@@ -1361,7 +1362,7 @@ union bpf_attr {
 * 	Return
 * 		0
 *
- * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
 * 	Description
 * 		Emulate a call to **setsockopt()** on the socket associated to
 * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1435,7 +1436,7 @@ union bpf_attr {
 * 	Return
 * 		**SK_PASS** on success, or **SK_DROP** on error.
 *
- * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
 * 	Description
 * 		Add an entry to, or update a *map* referencing sockets. The
 * 		*skops* is used as a new value for the entry associated to
@@ -1533,7 +1534,7 @@ union bpf_attr {
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
 * 	Description
 * 		For en eBPF program attached to a perf event, retrieve the
 * 		value of the event counter associated to *ctx* and store it in
@@ -1544,7 +1545,7 @@ union bpf_attr {
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
- * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
 * 	Description
 * 		Emulate a call to **getsockopt()** on the socket associated to
 * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1588,7 +1589,7 @@ union bpf_attr {
 * 	Return
 * 		0
 *
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
 * 	Description
 * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
 * 		for the full TCP socket associated to *bpf_sock_ops* to
@@ -1721,7 +1722,7 @@ union bpf_attr {
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
- * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
 * 	Description
 * 		Bind the socket associated to *ctx* to the address pointed by
 * 		*addr*, of length *addr_len*. This allows for making outgoing
@@ -1767,6 +1768,64 @@ union bpf_attr {
 * 		**CONFIG_XFRM** configuration option.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * 	Description
+ * 		Return a user or a kernel stack in bpf program provided buffer.
+ * 		To achieve this, the helper needs *ctx*, which is a pointer
+ * 		to the context on which the tracing program is executed.
+ * 		To store the stacktrace, the bpf program provides *buf* with
+ * 		a nonnegative *size*.
+ *
+ * 		The last argument, *flags*, holds the number of stack frames to
+ * 		skip (from 0 to 255), masked with
+ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * 		the following flags:
+ *
+ * 		**BPF_F_USER_STACK**
+ * 			Collect a user space stack instead of a kernel stack.
+ * 		**BPF_F_USER_BUILD_ID**
+ * 			Collect buildid+offset instead of ips for user stack,
+ * 			only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * 		**bpf_get_stack**\ () can collect up to
+ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * 		to sufficient large buffer size. Note that
+ * 		this limit can be controlled with the **sysctl** program, and
+ * 		that it should be manually increased in order to profile long
+ * 		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * 		::
+ *
+ * 			# sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * 	Return
+ * 		a non-negative value equal to or less than size on success, or
+ * 		a negative error in case of failure.
+ *
+ * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * 	Description
+ * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * 		it provides an easy way to load *len* bytes from *offset*
+ * 		from the packet associated to *skb*, into the buffer pointed
+ * 		by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * 		a fifth argument *start_header* exists in order to select a
+ * 		base offset to start from. *start_header* can be one of:
+ *
+ * 		**BPF_HDR_START_MAC**
+ * 			Base offset to load data from is *skb*'s mac header.
+ * 		**BPF_HDR_START_NET**
+ * 			Base offset to load data from is *skb*'s network header.
+ *
+ * 		In general, "direct packet access" is the preferred method to
+ * 		access packet data, however, this helper is in particular useful
+ * 		in socket filters where *skb*\ **->data** does not always point
+ * 		to the start of the mac header and where "direct packet access"
+ * 		is not available.
+ *
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
 */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -1835,7 +1894,9 @@ union bpf_attr {
 	FN(msg_pull_data),		\
 	FN(bind),			\
 	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),
+	FN(skb_get_xfrm_state),		\
+	FN(get_stack),			\
+	FN(skb_load_bytes_relative),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -1869,11 +1930,14 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)

-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK		0xffULL
 #define BPF_F_USER_STACK		(1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
 #define BPF_F_REUSE_STACKID		(1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID		(1ULL << 11)

 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
@@ -1893,6 +1957,12 @@ enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
 };

+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+	BPF_HDR_START_MAC,
+	BPF_HDR_START_NET,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
 * new fields can only be added to the end of this structure
 */

--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+ *
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ *	      Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM 1
+
+struct sockaddr_xdp {
+	__u16 sxdp_family;
+	__u32 sxdp_ifindex;
+	__u32 sxdp_queue_id;
+	__u32 sxdp_shared_umem_fd;
+	__u16 sxdp_flags;
+};
+
+/* XDP socket options */
+#define XDP_RX_RING			1
+#define XDP_TX_RING			2
+#define XDP_UMEM_REG			3
+#define XDP_UMEM_FILL_RING		4
+#define XDP_UMEM_COMPLETION_RING	5
+#define XDP_STATISTICS			6
+
+struct xdp_umem_reg {
+	__u64 addr; /* Start of packet data area */
+	__u64 len; /* Length of packet data area */
+	__u32 frame_size; /* Frame size */
+	__u32 frame_headroom; /* Frame head room */
+};
+
+struct xdp_statistics {
+	__u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+	__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+	__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING			  0
+#define XDP_PGOFF_TX_RING		 0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING	0x100000000
+#define XDP_UMEM_PGOFF_COMPLETION_RING	0x180000000
+
+struct xdp_desc {
+	__u32 idx;
+	__u32 len;
+	__u16 offset;
+	__u8 flags;
+	__u8 padding[5];
+};
+
+struct xdp_ring {
+	__u32 producer __attribute__((aligned(64)));
+	__u32 consumer __attribute__((aligned(64)));
+};
+
+/* Used for the RX and TX queues for packets */
+struct xdp_rxtx_ring {
+	struct xdp_ring ptrs;
+	struct xdp_desc desc[0] __attribute__((aligned(64)));
+};
+
+/* Used for the fill and completion queues for buffers */
+struct xdp_umem_ring {
+	struct xdp_ring ptrs;
+	__u32 desc[0] __attribute__((aligned(64)));
+};
+
+#endif /* _LINUX_IF_XDP_H */
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
+ifeq ($(CONFIG_XDP_SOCKETS),y)
+obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
+endif
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
 ifeq ($(CONFIG_STREAM_PARSER),y)
 ifeq ($(CONFIG_INET),y)

--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -31,6 +31,7 @@
 #include <linux/rbtree_latch.h>
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
+#include <linux/perf_event.h>

 #include <asm/unaligned.h>

@@ -633,23 +634,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
 		break;

-	case BPF_LD | BPF_ABS | BPF_W:
-	case BPF_LD | BPF_ABS | BPF_H:
-	case BPF_LD | BPF_ABS | BPF_B:
-		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
-		break;
-
-	case BPF_LD | BPF_IND | BPF_W:
-	case BPF_LD | BPF_IND | BPF_H:
-	case BPF_LD | BPF_IND | BPF_B:
-		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
-		*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
-		break;
-
 	case BPF_LD | BPF_IMM | BPF_DW:
 		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
 		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
@@ -890,14 +874,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
 	INSN_3(LDX, MEM, W),			\
 	INSN_3(LDX, MEM, DW),			\
 	/*   Immediate based. */		\
-	INSN_3(LD, IMM, DW),			\
-	/*   Misc (old cBPF carry-over). */	\
-	INSN_3(LD, ABS, B),			\
-	INSN_3(LD, ABS, H),			\
-	INSN_3(LD, ABS, W),			\
-	INSN_3(LD, IND, B),			\
-	INSN_3(LD, IND, H),			\
-	INSN_3(LD, IND, W)
+	INSN_3(LD, IMM, DW)

 bool bpf_opcode_in_insntable(u8 code)
 {
@@ -907,6 +884,13 @@ bool bpf_opcode_in_insntable(u8 code)
 		[0 ... 255] = false,
 		/* Now overwrite non-defaults ... */
 		BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
+		/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
+		[BPF_LD | BPF_ABS | BPF_B] = true,
+		[BPF_LD | BPF_ABS | BPF_H] = true,
+		[BPF_LD | BPF_ABS | BPF_W] = true,
+		[BPF_LD | BPF_IND | BPF_B] = true,
+		[BPF_LD | BPF_IND | BPF_H] = true,
+		[BPF_LD | BPF_IND | BPF_W] = true,
 	};
 #undef BPF_INSN_3_TBL
 #undef BPF_INSN_2_TBL
@@ -937,8 +921,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 #undef BPF_INSN_3_LBL
 #undef BPF_INSN_2_LBL
 	u32 tail_call_cnt = 0;
-	void *ptr;
-	int off;

 #define CONT	 ({ insn++; goto select_insn; })
 #define CONT_JMP ({ insn++; goto select_insn; })
@@ -1265,67 +1247,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 		atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
 			     (DST + insn->off));
 		CONT;
-	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
-		off = IMM;
-load_word:
-		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
-		 * appearing in the programs where ctx == skb
-		 * (see may_access_skb() in the verifier). All programs
-		 * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
-		 * bpf_convert_filter() saves it in BPF_R6, internal BPF
-		 * verifier will check that BPF_R6 == ctx.
-		 *
-		 * BPF_ABS and BPF_IND are wrappers of function calls,
-		 * so they scratch BPF_R1-BPF_R5 registers, preserve
-		 * BPF_R6-BPF_R9, and store return value into BPF_R0.
-		 *
-		 * Implicit input:
-		 *   ctx == skb == BPF_R6 == CTX
-		 *
-		 * Explicit input:
-		 *   SRC == any register
-		 *   IMM == 32-bit immediate
-		 *
-		 * Output:
-		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
-		 */
-
-		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
-		if (likely(ptr != NULL)) {
-			BPF_R0 = get_unaligned_be32(ptr);
-			CONT;
-		}
-
-		return 0;
-	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
-		off = IMM;
-load_half:
-		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
-		if (likely(ptr != NULL)) {
-			BPF_R0 = get_unaligned_be16(ptr);
-			CONT;
-		}
-
-		return 0;
-	LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
-		off = IMM;
-load_byte:
-		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
-		if (likely(ptr != NULL)) {
-			BPF_R0 = *(u8 *)ptr;
-			CONT;
-		}
-
-		return 0;
-	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
-		off = IMM + SRC;
-		goto load_word;
-	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
-		off = IMM + SRC;
-		goto load_half;
-	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
-		off = IMM + SRC;
-		goto load_byte;

 	default_label:
 		/* If we ever reach this, we have a bug somewhere. Die hard here
@@ -1722,6 +1643,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 	aux = container_of(work, struct bpf_prog_aux, work);
 	if (bpf_prog_is_dev_bound(aux))
 		bpf_prog_offload_destroy(aux->prog);
+#ifdef CONFIG_PERF_EVENTS
+	if (aux->prog->has_callchain_buf)
+		put_callchain_buffers();
+#endif
 	for (i = 0; i < aux->func_cnt; i++)
 		bpf_jit_free(aux->func[i]);
 	if (aux->func_cnt) {
@@ -1794,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 {
 	return -ENOTSUPP;
 }
+EXPORT_SYMBOL_GPL(bpf_event_output);

 /* Always built-in helper functions. */
 const struct bpf_func_proto bpf_tail_call_proto = {
@@ -1840,9 +1766,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 #include <linux/bpf_trace.h>

 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
-
-/* These are only used within the BPF_SYSCALL code */
-#ifdef CONFIG_BPF_SYSCALL
-EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
-#endif
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 	ret = bpf_obj_do_pin(pname, raw, type);
 	if (ret != 0)
 		bpf_any_put(raw, type);
-	if ((trace_bpf_obj_pin_prog_enabled() ||
-	     trace_bpf_obj_pin_map_enabled()) && !ret) {
-		if (type == BPF_TYPE_PROG)
-			trace_bpf_obj_pin_prog(raw, ufd, pname);
-		if (type == BPF_TYPE_MAP)
-			trace_bpf_obj_pin_map(raw, ufd, pname);
-	}
 out:
 	putname(pname);
 	return ret;
@@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	else
 		goto out;

-	if (ret < 0) {
+	if (ret < 0)
 		bpf_any_put(raw, type);
-	} else if (trace_bpf_obj_get_prog_enabled() ||
-		   trace_bpf_obj_get_map_enabled()) {
-		if (type == BPF_TYPE_PROG)
-			trace_bpf_obj_get_prog(raw, ret, pname);
-		if (type == BPF_TYPE_MAP)
-			trace_bpf_obj_get_map(raw, ret, pname);
-	}
 out:
 	putname(pname);
 	return ret;

--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
 *
 * This software is licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
 	struct bpf_prog_offload *offload;
 	bool ret;

-	if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
+	if (!bpf_prog_is_dev_bound(prog->aux))
 		return false;
+	if (!bpf_map_is_dev_bound(map))
+		return bpf_map_offload_neutral(map);

 	down_read(&bpf_devs_lock);
 	offload = prog->aux->offload;

--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -262,16 +262,11 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
 	return ret;
 }

-static void stack_map_get_build_id_offset(struct bpf_map *map,
-					  struct stack_map_bucket *bucket,
+static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 					  u64 *ips, u32 trace_nr, bool user)
 {
 	int i;
 	struct vm_area_struct *vma;
-	struct bpf_stack_build_id *id_offs;
-
-	bucket->nr = trace_nr;
-	id_offs = (struct bpf_stack_build_id *)bucket->data;

 	/*
 	 * We cannot do up_read() in nmi context, so build_id lookup is
@@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 			pcpu_freelist_pop(&smap->freelist);
 		if (unlikely(!new_bucket))
 			return -ENOMEM;
-		stack_map_get_build_id_offset(map, new_bucket, ips,
-					      trace_nr, user);
+		new_bucket->nr = trace_nr;
+		stack_map_get_build_id_offset(
+			(struct bpf_stack_build_id *)new_bucket->data,
+			ips, trace_nr, user);
 		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
 		if (hash_matches && bucket->nr == trace_nr &&
 		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
@@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };

+BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
+	   u64, flags)
+{
+	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+	bool user = flags & BPF_F_USER_STACK;
+	struct perf_callchain_entry *trace;
+	bool kernel = !user;
+	int err = -EINVAL;
+	u64 *ips;
+
+	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+			       BPF_F_USER_BUILD_ID)))
+		goto clear;
+	if (kernel && user_build_id)
+		goto clear;
+
+	elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
+					    : sizeof(u64);
+	if (unlikely(size % elem_size))
+		goto clear;
+
+	num_elem = size / elem_size;
+	if (sysctl_perf_event_max_stack < num_elem)
+		init_nr = 0;
+	else
+		init_nr = sysctl_perf_event_max_stack - num_elem;
+	trace = get_perf_callchain(regs, init_nr, kernel, user,
+				   sysctl_perf_event_max_stack, false, false);
+	if (unlikely(!trace))
+		goto err_fault;
+
+	trace_nr = trace->nr - init_nr;
+	if (trace_nr < skip)
+		goto err_fault;
+
+	trace_nr -= skip;
+	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
+	copy_len = trace_nr * elem_size;
+	ips = trace->ip + skip + init_nr;
+	if (user && user_build_id)
+		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
+	else
+		memcpy(buf, ips, copy_len);
+
+	if (size > copy_len)
+		memset(buf + copy_len, 0, size - copy_len);
+	return copy_len;
+
+err_fault:
+	err = -EFAULT;
+clear:
+	memset(buf, 0, size);
+	return err;
+}
+
+const struct bpf_func_proto bpf_get_stack_proto = {
+	.func		= bpf_get_stack,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 /* Called from eBPF program */
 static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
 {

--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map)
 {
 	__bpf_map_put(map, true);
 }
+EXPORT_SYMBOL_GPL(bpf_map_put);

 void bpf_map_put_with_uref(struct bpf_map *map)
 {
@@ -503,7 +504,6 @@ static int map_create(union bpf_attr *attr)
 		return err;
 	}

-	trace_bpf_map_create(map, err);
 	return err;

 free_map:
@@ -544,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 		atomic_inc(&map->usercnt);
 	return map;
 }
+EXPORT_SYMBOL_GPL(bpf_map_inc);

 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 {
@@ -663,7 +664,6 @@ static int map_lookup_elem(union bpf_attr *attr)
 	if (copy_to_user(uvalue, value, value_size) != 0)
 		goto free_value;

-	trace_bpf_map_lookup_elem(map, ufd, key, value);
 	err = 0;

 free_value:
@@ -760,8 +760,6 @@ static int map_update_elem(union bpf_attr *attr)
 	__this_cpu_dec(bpf_prog_active);
 	preempt_enable();
 out:
-	if (!err)
-		trace_bpf_map_update_elem(map, ufd, key, value);
 free_value:
 	kfree(value);
 free_key:
@@ -814,8 +812,6 @@ static int map_delete_elem(union bpf_attr *attr)
 	__this_cpu_dec(bpf_prog_active);
 	preempt_enable();
 out:
-	if (!err)
-		trace_bpf_map_delete_elem(map, ufd, key);
 	kfree(key);
 err_put:
 	fdput(f);
@@ -879,7 +875,6 @@ static int map_get_next_key(union bpf_attr *attr)
 	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 		goto free_next_key;

-	trace_bpf_map_next_key(map, ufd, key, next_key);
 	err = 0;

 free_next_key:
@@ -1027,7 +1022,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
 		int i;

-		trace_bpf_prog_put_rcu(prog);
 		/* bpf_prog_free_id() must be called first */
 		bpf_prog_free_id(prog, do_idr_lock);

@@ -1194,11 +1188,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
 				       bool attach_drv)
 {
-	struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv);
-
-	if (!IS_ERR(prog))
-		trace_bpf_prog_get_type(prog);
-	return prog;
+	return __bpf_prog_get(ufd, &type, attach_drv);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);

@@ -1373,7 +1363,6 @@ static int bpf_prog_load(union bpf_attr *attr)
 	}

 	bpf_prog_kallsyms_add(prog);
-	trace_bpf_prog_load(prog, err);
 	return err;

 free_used_maps:

--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
 	return TNUM(a.value >> shift, a.mask >> shift);
 }

+struct tnum tnum_arshift(struct tnum a, u8 min_shift)
+{
+	/* if a.value is negative, arithmetic shifting by minimum shift
+	 * will have larger negative offset compared to more shifting.
+	 * If a.value is nonnegative, arithmetic shifting by minimum shift
+	 * will have larger positive offset compare to more shifting.
+	 */
+	return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
+}
+
 struct tnum tnum_add(struct tnum a, struct tnum b)
 {
 	u64 sm, sv, sigma, chi, mu;

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
+// SPDX-License-Identifier: GPL-2.0
+/* XSKMAP used for AF_XDP sockets
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/capability.h>
+#include <net/xdp_sock.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+struct xsk_map {
+	struct bpf_map map;
+	struct xdp_sock **xsk_map;
+	struct list_head __percpu *flush_list;
+};
+
+static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
+{
+	int cpu, err = -EINVAL;
+	struct xsk_map *m;
+	u64 cost;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    attr->value_size != 4 ||
+	    attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
+		return ERR_PTR(-EINVAL);
+
+	m = kzalloc(sizeof(*m), GFP_USER);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&m->map, attr);
+
+	cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
+	cost += sizeof(struct list_head) * num_possible_cpus();
+	if (cost >= U32_MAX - PAGE_SIZE)
+		goto free_m;
+
+	m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* Notice returns -EPERM on if map size is larger than memlock limit */
+	err = bpf_map_precharge_memlock(m->map.pages);
+	if (err)
+		goto free_m;
+
+	err = -ENOMEM;
+
+	m->flush_list = alloc_percpu(struct list_head);
+	if (!m->flush_list)
+		goto free_m;
+
+	for_each_possible_cpu(cpu)
+		INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
+
+	m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
+					sizeof(struct xdp_sock *),
+					m->map.numa_node);
+	if (!m->xsk_map)
+		goto free_percpu;
+	return &m->map;
+
+free_percpu:
+	free_percpu(m->flush_list);
+free_m:
+	kfree(m);
+	return ERR_PTR(err);
+}
+
+static void xsk_map_free(struct bpf_map *map)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	int i;
+
+	synchronize_net();
+
+	for (i = 0; i < map->max_entries; i++) {
+		struct xdp_sock *xs;
+
+		xs = m->xsk_map[i];
+		if (!xs)
+			continue;
+
+		sock_put((struct sock *)xs);
+	}
+
+	free_percpu(m->flush_list);
+	bpf_map_area_free(m->xsk_map);
+	kfree(m);
+}
+
+static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = next_key;
+
+	if (index >= m->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == m->map.max_entries - 1)
+		return -ENOENT;
+	*next = index + 1;
+	return 0;
+}
+
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *xs;
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	xs = READ_ONCE(m->xsk_map[key]);
+	return xs;
+}
+
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+		       struct xdp_sock *xs)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+	int err;
+
+	err = xsk_rcv(xs, xdp);
+	if (err)
+		return err;
+
+	if (!xs->flush_node.prev)
+		list_add(&xs->flush_node, flush_list);
+
+	return 0;
+}
+
+void __xsk_map_flush(struct bpf_map *map)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+	struct xdp_sock *xs, *tmp;
+
+	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
+		xsk_flush(xs);
+		__list_del(xs->flush_node.prev, xs->flush_node.next);
+		xs->flush_node.prev = NULL;
+	}
+}
+
+static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
+			       u64 map_flags)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	u32 i = *(u32 *)key, fd = *(u32 *)value;
+	struct xdp_sock *xs, *old_xs;
+	struct socket *sock;
+	int err;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(i >= m->map.max_entries))
+		return -E2BIG;
+	if (unlikely(map_flags == BPF_NOEXIST))
+		return -EEXIST;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		return err;
+
+	if (sock->sk->sk_family != PF_XDP) {
+		sockfd_put(sock);
+		return -EOPNOTSUPP;
+	}
+
+	xs = (struct xdp_sock *)sock->sk;
+
+	if (!xsk_is_setup_for_bpf_map(xs)) {
+		sockfd_put(sock);
+		return -EOPNOTSUPP;
+	}
+
+	sock_hold(sock->sk);
+
+	old_xs = xchg(&m->xsk_map[i], xs);
+	if (old_xs) {
+		/* Make sure we've flushed everything. */
+		synchronize_net();
+		sock_put((struct sock *)old_xs);
+	}
+
+	sockfd_put(sock);
+	return 0;
+}
+
+static int xsk_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *old_xs;
+	int k = *(u32 *)key;
+
+	if (k >= map->max_entries)
+		return -EINVAL;
+
+	old_xs = xchg(&m->xsk_map[k], NULL);
+	if (old_xs) {
+		/* Make sure we've flushed everything. */
+		synchronize_net();
+		sock_put((struct sock *)old_xs);
+	}
+
+	return 0;
+}
+
+const struct bpf_map_ops xsk_map_ops = {
+	.map_alloc = xsk_map_alloc,
+	.map_free = xsk_map_free,
+	.map_get_next_key = xsk_map_get_next_key,
+	.map_lookup_elem = xsk_map_lookup_elem,
+	.map_update_elem = xsk_map_update_elem,
+	.map_delete_elem = xsk_map_delete_elem,
+};
+
+
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -20,6 +20,7 @@
 #include "trace.h"

 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

 /**
 * trace_call_bpf - invoke BPF program
@@ -474,8 +475,6 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct cgroup *cgrp;

-	if (unlikely(in_interrupt()))
-		return -EINVAL;
 	if (unlikely(idx >= array->map.max_entries))
 		return -E2BIG;

@@ -577,6 +576,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_perf_event_output_proto;
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto;
+	case BPF_FUNC_get_stack:
+		return &bpf_get_stack_proto;
 	case BPF_FUNC_perf_event_read_value:
 		return &bpf_perf_event_read_value_proto;
 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
@@ -664,6 +665,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
 	.arg3_type	= ARG_ANYTHING,
 };

+BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
+	   u64, flags)
+{
+	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
+	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+			     (unsigned long) size, flags, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stack_proto_tp = {
+	.func		= bpf_get_stack_tp,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -672,6 +692,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_perf_event_output_proto_tp;
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto_tp;
+	case BPF_FUNC_get_stack:
+		return &bpf_get_stack_proto_tp;
 	default:
 		return tracing_func_proto(func_id, prog);
 	}
@@ -734,6 +756,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_perf_event_output_proto_tp;
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto_tp;
+	case BPF_FUNC_get_stack:
+		return &bpf_get_stack_proto_tp;
 	case BPF_FUNC_perf_prog_read_value:
 		return &bpf_perf_prog_read_value_proto;
 	default:
@@ -744,7 +768,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 /*
 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
 * to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output and/or bpf_get_stack_id
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
 */
 static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
@@ -787,6 +811,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
 	.arg3_type	= ARG_ANYTHING,
 };

+BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
+	   void *, buf, u32, size, u64, flags)
+{
+	struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+	perf_fetch_caller_regs(regs);
+	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+			     (unsigned long) size, flags, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
+	.func		= bpf_get_stack_raw_tp,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -795,6 +839,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_perf_event_output_proto_raw_tp;
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto_raw_tp;
+	case BPF_FUNC_get_stack:
+		return &bpf_get_stack_proto_raw_tp;
 	default:
 		return tracing_func_proto(func_id, prog);
 	}

--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -59,6 +59,7 @@ source "net/tls/Kconfig"
 source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 source "net/smc/Kconfig"
+source "net/xdp/Kconfig"

 config INET
 	bool "TCP/IP networking"

--- a/net/Makefile
+++ b/net/Makefile
@@ -85,3 +85,4 @@ obj-y				+= l3mdev/
 endif
 obj-$(CONFIG_QRTR)		+= qrtr/
 obj-$(CONFIG_NET_NCSI)		+= ncsi/
+obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
--- a/net/core/dev.c
+++ b/net/core/dev.c
--- a/net/core/filter.c
+++ b/net/core/filter.c
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -226,7 +226,8 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
  x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
  x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
  x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
-  x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_MAX"
+  x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
+  x "AF_MAX"

 static const char *const af_family_key_strings[AF_MAX+1] = {
 	_sock_locks("sk_lock-")
@@ -262,7 +263,8 @@ static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
  "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
  "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
  "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
-  "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_MAX"
+  "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_XDP"      ,
+  "rlock-AF_MAX"
 };
 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
  "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
@@ -279,7 +281,8 @@ static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
  "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
  "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
  "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
-  "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_MAX"
+  "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_XDP"      ,
+  "wlock-AF_MAX"
 };
 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
  "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
@@ -296,7 +299,8 @@ static const char *const af_family_elock_key_strings[AF_MAX+1] = {
  "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
  "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
  "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
-  "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_MAX"
+  "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_XDP"      ,
+  "elock-AF_MAX"
 };

 /*

--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -308,11 +308,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);

-void xdp_return_frame(struct xdp_frame *xdpf)
+static void xdp_return(void *data, struct xdp_mem_info *mem)
 {
-	struct xdp_mem_info *mem = &xdpf->mem;
 	struct xdp_mem_allocator *xa;
-	void *data = xdpf->data;
 	struct page *page;

 	switch (mem->type) {
@@ -339,4 +337,15 @@ void xdp_return_frame(struct xdp_frame *xdpf)
 		break;
 	}
 }
+
+void xdp_return_frame(struct xdp_frame *xdpf)
+{
+	xdp_return(xdpf->data, &xdpf->mem);
+}
 EXPORT_SYMBOL_GPL(xdp_return_frame);
+
+void xdp_return_buff(struct xdp_buff *xdp)
+{
+	xdp_return(xdp->data, &xdp->rxq->mem);
+}
+EXPORT_SYMBOL_GPL(xdp_return_buff);
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
--- a/net/xdp/Kconfig
+++ b/net/xdp/Kconfig
+config XDP_SOCKETS
+	bool "XDP sockets"
+	depends on BPF_SYSCALL
+	default n
+	help
+	  XDP sockets allows a channel between XDP programs and
+	  userspace applications.
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
+obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
--- a/net/xdp/xdp_umem_props.h
+++ b/net/xdp/xdp_umem_props.h
+/* SPDX-License-Identifier: GPL-2.0
+ * XDP user-space packet buffer
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef XDP_UMEM_PROPS_H_
+#define XDP_UMEM_PROPS_H_
+
+struct xdp_umem_props {
+	u32 frame_size;
+	u32 nframes;
+};
+
+#endif /* XDP_UMEM_PROPS_H_ */
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
--- a/samples/bpf/xdpsock.h
+++ b/samples/bpf/xdpsock.h
--- a/samples/bpf/xdpsock_kern.c
+++ b/samples/bpf/xdpsock_kern.c
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
--- a/tools/include/uapi/linux/erspan.h
+++ b/tools/include/uapi/linux/erspan.h
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
--- a/tools/testing/selftests/bpf/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
--- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
--- a/tools/testing/selftests/bpf/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h