Commit 44851665 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-nfp-jmp-memcpy-improvements'

Jiong Wang says:

====================
Currently, compiler will lower memcpy function call in XDP/eBPF C program
into a sequence of eBPF load/store pairs for some scenarios.

Compiler is thinking this "inline" optimiation is beneficial as it could
avoid function call and also increase code locality.

However, Netronome NPU is not an tranditional load/store architecture that
doing a sequence of individual load/store actions are not efficient.

This patch set tries to identify the load/store sequences composed of
load/store pairs that comes from memcpy lowering, then accelerates them
through NPU's Command Push Pull (CPP) instruction.

This patch set registered an new optimization pass before doing the actual
JIT work, it traverse through eBPF IR, once found candidate sequence then
record the memory copy source, destination and length information in the
first load instruction starting the sequence and marks all remaining
instructions in the sequence into skipable status. Later, when JITing the
first load instructoin, optimal instructions will be generated using those
record information.

For this safety of this transformation:

  - jump into the middle of the sequence will cancel the optimization.

  - overlapped memory access will cancel the optimization.

  - the load destination register still contains the same value as before
    the transformation.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 554b36bf 6bc7103c
/* /*
* Copyright (C) 2016 Netronome Systems, Inc. * Copyright (C) 2016-2017 Netronome Systems, Inc.
* *
* This software is dual licensed under the GNU General License Version 2, * This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this * June 1991 as shown in the file COPYING in the top-level directory of this
...@@ -89,23 +89,37 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); ...@@ -89,23 +89,37 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
#define nfp_meta_next(meta) list_next_entry(meta, l) #define nfp_meta_next(meta) list_next_entry(meta, l)
#define nfp_meta_prev(meta) list_prev_entry(meta, l) #define nfp_meta_prev(meta) list_prev_entry(meta, l)
#define FLAG_INSN_IS_JUMP_DST BIT(0)
/** /**
* struct nfp_insn_meta - BPF instruction wrapper * struct nfp_insn_meta - BPF instruction wrapper
* @insn: BPF instruction * @insn: BPF instruction
* @ptr: pointer type for memory operations * @ptr: pointer type for memory operations
* @ldst_gather_len: memcpy length gathered from load/store sequence
* @paired_st: the paired store insn at the head of the sequence
* @ptr_not_const: pointer is not always constant * @ptr_not_const: pointer is not always constant
* @jmp_dst: destination info for jump instructions
* @off: index of first generated machine instruction (in nfp_prog.prog) * @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number * @n: eBPF instruction number
* @flags: eBPF instruction extra optimization flags
* @skip: skip this instruction (optimized out) * @skip: skip this instruction (optimized out)
* @double_cb: callback for second part of the instruction * @double_cb: callback for second part of the instruction
* @l: link on nfp_prog->insns list * @l: link on nfp_prog->insns list
*/ */
struct nfp_insn_meta { struct nfp_insn_meta {
struct bpf_insn insn; struct bpf_insn insn;
union {
struct {
struct bpf_reg_state ptr; struct bpf_reg_state ptr;
struct bpf_insn *paired_st;
s16 ldst_gather_len;
bool ptr_not_const; bool ptr_not_const;
};
struct nfp_insn_meta *jmp_dst;
};
unsigned int off; unsigned int off;
unsigned short n; unsigned short n;
unsigned short flags;
bool skip; bool skip;
instr_cb_t double_cb; instr_cb_t double_cb;
...@@ -134,6 +148,16 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) ...@@ -134,6 +148,16 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
return BPF_MODE(meta->insn.code); return BPF_MODE(meta->insn.code);
} }
static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
{
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
}
static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
{
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
}
/** /**
* struct nfp_prog - nfp BPF program * struct nfp_prog - nfp BPF program
* @prog: machine code * @prog: machine code
...@@ -142,6 +166,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) ...@@ -142,6 +166,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
* @verifier_meta: temporary storage for verifier's insn meta * @verifier_meta: temporary storage for verifier's insn meta
* @type: BPF program type * @type: BPF program type
* @start_off: address of the first instruction in the memory * @start_off: address of the first instruction in the memory
* @last_bpf_off: address of the last instruction translated from BPF
* @tgt_out: jump target for normal exit * @tgt_out: jump target for normal exit
* @tgt_abort: jump target for abort (e.g. access outside of packet buffer) * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
* @tgt_done: jump target to get the next packet * @tgt_done: jump target to get the next packet
...@@ -160,6 +185,7 @@ struct nfp_prog { ...@@ -160,6 +185,7 @@ struct nfp_prog {
enum bpf_prog_type type; enum bpf_prog_type type;
unsigned int start_off; unsigned int start_off;
unsigned int last_bpf_off;
unsigned int tgt_out; unsigned int tgt_out;
unsigned int tgt_abort; unsigned int tgt_abort;
unsigned int tgt_done; unsigned int tgt_done;
...@@ -189,4 +215,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, ...@@ -189,4 +215,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog); struct bpf_prog *prog);
int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog); struct bpf_prog *prog);
struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns);
#endif #endif
/* /*
* Copyright (C) 2016 Netronome Systems, Inc. * Copyright (C) 2016-2017 Netronome Systems, Inc.
* *
* This software is dual licensed under the GNU General License Version 2, * This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this * June 1991 as shown in the file COPYING in the top-level directory of this
...@@ -55,11 +55,10 @@ static int ...@@ -55,11 +55,10 @@ static int
nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
unsigned int cnt) unsigned int cnt)
{ {
struct nfp_insn_meta *meta;
unsigned int i; unsigned int i;
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
struct nfp_insn_meta *meta;
meta = kzalloc(sizeof(*meta), GFP_KERNEL); meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (!meta) if (!meta)
return -ENOMEM; return -ENOMEM;
...@@ -70,6 +69,24 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, ...@@ -70,6 +69,24 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
list_add_tail(&meta->l, &nfp_prog->insns); list_add_tail(&meta->l, &nfp_prog->insns);
} }
/* Another pass to record jump information. */
list_for_each_entry(meta, &nfp_prog->insns, l) {
u64 code = meta->insn.code;
if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
BPF_OP(code) != BPF_CALL) {
struct nfp_insn_meta *dst_meta;
unsigned short dst_indx;
dst_indx = meta->n + 1 + meta->insn.off;
dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
cnt);
meta->jmp_dst = dst_meta;
dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
}
}
return 0; return 0;
} }
......
/* /*
* Copyright (C) 2016 Netronome Systems, Inc. * Copyright (C) 2016-2017 Netronome Systems, Inc.
* *
* This software is dual licensed under the GNU General License Version 2, * This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this * June 1991 as shown in the file COPYING in the top-level directory of this
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#include "main.h" #include "main.h"
static struct nfp_insn_meta * struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns) unsigned int insn_idx, unsigned int n_insns)
{ {
...@@ -180,10 +180,10 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) ...@@ -180,10 +180,10 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (meta->insn.code == (BPF_JMP | BPF_EXIT)) if (meta->insn.code == (BPF_JMP | BPF_EXIT))
return nfp_bpf_check_exit(nfp_prog, env); return nfp_bpf_check_exit(nfp_prog, env);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) if (is_mbpf_load(meta))
return nfp_bpf_check_ptr(nfp_prog, meta, env, return nfp_bpf_check_ptr(nfp_prog, meta, env,
meta->insn.src_reg); meta->insn.src_reg);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) if (is_mbpf_store(meta))
return nfp_bpf_check_ptr(nfp_prog, meta, env, return nfp_bpf_check_ptr(nfp_prog, meta, env,
meta->insn.dst_reg); meta->insn.dst_reg);
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
[CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 }, [CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 },
[CMD_TGT_WRITE32_SWAP] = { 0x02, 0x5f },
[CMD_TGT_READ8] = { 0x01, 0x43 }, [CMD_TGT_READ8] = { 0x01, 0x43 },
[CMD_TGT_READ32] = { 0x00, 0x5c }, [CMD_TGT_READ32] = { 0x00, 0x5c },
[CMD_TGT_READ32_LE] = { 0x01, 0x5c }, [CMD_TGT_READ32_LE] = { 0x01, 0x5c },
...@@ -120,7 +121,8 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, ...@@ -120,7 +121,8 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
reg->dst = nfp_swreg_to_unreg(dst, true); reg->dst = nfp_swreg_to_unreg(dst, true);
/* Decode source operands */ /* Decode source operands */
if (swreg_type(lreg) == swreg_type(rreg)) if (swreg_type(lreg) == swreg_type(rreg) &&
swreg_type(lreg) != NN_REG_NONE)
return -EFAULT; return -EFAULT;
if (swreg_type(lreg) == NN_REG_GPR_B || if (swreg_type(lreg) == NN_REG_GPR_B ||
...@@ -200,7 +202,8 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, ...@@ -200,7 +202,8 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
/* Decode source operands */ /* Decode source operands */
if (swreg_type(lreg) == swreg_type(rreg)) if (swreg_type(lreg) == swreg_type(rreg) &&
swreg_type(lreg) != NN_REG_NONE)
return -EFAULT; return -EFAULT;
if (swreg_type(lreg) == NN_REG_GPR_B || if (swreg_type(lreg) == NN_REG_GPR_B ||
......
/* /*
* Copyright (C) 2016 Netronome Systems, Inc. * Copyright (C) 2016-2017 Netronome Systems, Inc.
* *
* This software is dual licensed under the GNU General License Version 2, * This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this * June 1991 as shown in the file COPYING in the top-level directory of this
...@@ -209,6 +209,7 @@ enum alu_dst_ab { ...@@ -209,6 +209,7 @@ enum alu_dst_ab {
#define OP_CMD_CNT 0x0000e000000ULL #define OP_CMD_CNT 0x0000e000000ULL
#define OP_CMD_SIG 0x000f0000000ULL #define OP_CMD_SIG 0x000f0000000ULL
#define OP_CMD_TGT_CMD 0x07f00000000ULL #define OP_CMD_TGT_CMD 0x07f00000000ULL
#define OP_CMD_INDIR 0x20000000000ULL
#define OP_CMD_MODE 0x1c0000000000ULL #define OP_CMD_MODE 0x1c0000000000ULL
struct cmd_tgt_act { struct cmd_tgt_act {
...@@ -219,6 +220,7 @@ struct cmd_tgt_act { ...@@ -219,6 +220,7 @@ struct cmd_tgt_act {
enum cmd_tgt_map { enum cmd_tgt_map {
CMD_TGT_READ8, CMD_TGT_READ8,
CMD_TGT_WRITE8_SWAP, CMD_TGT_WRITE8_SWAP,
CMD_TGT_WRITE32_SWAP,
CMD_TGT_READ32, CMD_TGT_READ32,
CMD_TGT_READ32_LE, CMD_TGT_READ32_LE,
CMD_TGT_READ32_SWAP, CMD_TGT_READ32_SWAP,
...@@ -240,6 +242,9 @@ enum cmd_ctx_swap { ...@@ -240,6 +242,9 @@ enum cmd_ctx_swap {
CMD_CTX_NO_SWAP = 3, CMD_CTX_NO_SWAP = 3,
}; };
#define CMD_OVE_LEN BIT(7)
#define CMD_OV_LEN GENMASK(12, 8)
#define OP_LCSR_BASE 0x0fc00000000ULL #define OP_LCSR_BASE 0x0fc00000000ULL
#define OP_LCSR_A_SRC 0x000000003ffULL #define OP_LCSR_A_SRC 0x000000003ffULL
#define OP_LCSR_B_SRC 0x000000ffc00ULL #define OP_LCSR_B_SRC 0x000000ffc00ULL
......
...@@ -548,6 +548,8 @@ struct nfp_net_dp { ...@@ -548,6 +548,8 @@ struct nfp_net_dp {
* @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @max_r_vecs: Number of allocated interrupt vectors for RX/TX
* @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_tx_rings: Maximum number of TX rings supported by the Firmware
* @max_rx_rings: Maximum number of RX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware
* @stride_rx: Queue controller RX queue spacing
* @stride_tx: Queue controller TX queue spacing
* @r_vecs: Pre-allocated array of ring vectors * @r_vecs: Pre-allocated array of ring vectors
* @irq_entries: Pre-allocated array of MSI-X entries * @irq_entries: Pre-allocated array of MSI-X entries
* @lsc_handler: Handler for Link State Change interrupt * @lsc_handler: Handler for Link State Change interrupt
......
...@@ -372,8 +372,7 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest, ...@@ -372,8 +372,7 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
* that it can be accessed directly. * that it can be accessed directly.
* *
* NOTE: @address and @size must be 32-bit aligned values. * NOTE: @address and @size must be 32-bit aligned values.
* * The area must also be 'released' when the structure is freed.
* NOTE: The area must also be 'released' when the structure is freed.
* *
* Return: NFP CPP Area handle, or NULL * Return: NFP CPP Area handle, or NULL
*/ */
...@@ -536,8 +535,7 @@ void nfp_cpp_area_release_free(struct nfp_cpp_area *area) ...@@ -536,8 +535,7 @@ void nfp_cpp_area_release_free(struct nfp_cpp_area *area)
* Read data from indicated CPP region. * Read data from indicated CPP region.
* *
* NOTE: @offset and @length must be 32-bit aligned values. * NOTE: @offset and @length must be 32-bit aligned values.
* * Area must have been locked down with an 'acquire'.
* NOTE: Area must have been locked down with an 'acquire'.
* *
* Return: length of io, or -ERRNO * Return: length of io, or -ERRNO
*/ */
...@@ -558,8 +556,7 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, ...@@ -558,8 +556,7 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area,
* Write data to indicated CPP region. * Write data to indicated CPP region.
* *
* NOTE: @offset and @length must be 32-bit aligned values. * NOTE: @offset and @length must be 32-bit aligned values.
* * Area must have been locked down with an 'acquire'.
* NOTE: Area must have been locked down with an 'acquire'.
* *
* Return: length of io, or -ERRNO * Return: length of io, or -ERRNO
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment