Commit 44549623 authored by Quentin Monnet's avatar Quentin Monnet Committed by Daniel Borkmann

nfp: bpf: optimise save/restore for R6~R9 based on register usage

When pre-processing the instructions, it is trivial to detect what
subprograms are using R6, R7, R8 or R9 as destination registers. If a
subprogram uses none of those, then we do not need to jump to the
subroutines dedicated to saving and restoring callee-saved registers in
its prologue and epilogue.

This patch introduces detection of callee-saved registers in subprograms
and prevents the JIT from adding calls to those subroutines whenever we
can: we save some instructions in the translated program, and some time
at runtime on BPF-to-BPF calls and returns.

If no subprogram needs to save those registers, we can avoid appending
the subroutines at the end of the program.
Signed-off-by: default avatarQuentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 2178f3f0
...@@ -3132,7 +3132,9 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) ...@@ -3132,7 +3132,9 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
NFP_CSR_ACT_LM_ADDR0); NFP_CSR_ACT_LM_ADDR0);
} }
/* The following steps are performed: /* Two cases for jumping to the callee:
*
* - If callee uses and needs to save R6~R9 then:
* 1. Put the start offset of the callee into imm_b(). This will * 1. Put the start offset of the callee into imm_b(). This will
* require a fixup step, as we do not necessarily know this * require a fixup step, as we do not necessarily know this
* address yet. * address yet.
...@@ -3140,8 +3142,12 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) ...@@ -3140,8 +3142,12 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
* register ret_reg(). * register ret_reg().
* 3. (After defer slots are consumed) Jump to the subroutine that * 3. (After defer slots are consumed) Jump to the subroutine that
* pushes the registers to the stack. * pushes the registers to the stack.
* The subroutine acts as a trampoline, and returns to the address in * The subroutine acts as a trampoline, and returns to the address in
* imm_b(), i.e. jumps to the callee. * imm_b(), i.e. jumps to the callee.
*
* - If callee does not need to save R6~R9 then just load return
* address to the caller in ret_reg(), and jump to the callee
* directly.
* *
* Using ret_reg() to pass the return address to the callee is set here * Using ret_reg() to pass the return address to the callee is set here
* as a convention. The callee can then push this address onto its * as a convention. The callee can then push this address onto its
...@@ -3157,11 +3163,21 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) ...@@ -3157,11 +3163,21 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
* execution of the callee, we will not have to push the return * execution of the callee, we will not have to push the return
* address to the stack for leaf functions. * address to the stack for leaf functions.
*/ */
ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; if (!meta->jmp_dst) {
emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
RELO_BR_GO_CALL_PUSH_REGS); return -ELOOP;
offset_br = nfp_prog_current_offset(nfp_prog); }
wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
RELO_BR_GO_CALL_PUSH_REGS);
offset_br = nfp_prog_current_offset(nfp_prog);
wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
} else {
ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
emit_br(nfp_prog, BR_UNC, meta->n + 1 + meta->insn.imm, 1);
offset_br = nfp_prog_current_offset(nfp_prog);
}
wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
...@@ -3227,15 +3243,24 @@ static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) ...@@ -3227,15 +3243,24 @@ static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int static int
nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{ {
/* Pop R6~R9 to the stack via related subroutine. if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
* Pop return address for BPF-to-BPF call from the stack and load it /* Pop R6~R9 to the stack via related subroutine.
* into ret_reg() before we jump. This means that the subroutine does * We loaded the return address to the caller into ret_reg().
* not come back here, we make it jump back to the subprogram caller * This means that the subroutine does not come back here, we
* directly! * make it jump back to the subprogram caller directly!
*/ */
emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
RELO_BR_GO_CALL_POP_REGS); RELO_BR_GO_CALL_POP_REGS);
wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); /* Pop return address from the stack. */
wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
} else {
/* Pop return address from the stack. */
wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
/* Jump back to caller if no callee-saved registers were used
* by the subprogram.
*/
emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
}
return 0; return 0;
} }
...@@ -3410,7 +3435,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) ...@@ -3410,7 +3435,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
return -ELOOP; return -ELOOP;
} }
if (is_mbpf_pseudo_call(meta)) { if (is_mbpf_pseudo_call(meta) &&
nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
err = nfp_fixup_immed_relo(nfp_prog, meta, err = nfp_fixup_immed_relo(nfp_prog, meta,
jmp_dst, br_idx); jmp_dst, br_idx);
if (err) if (err)
...@@ -3549,6 +3575,17 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) ...@@ -3549,6 +3575,17 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
} }
static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
{
unsigned int idx;
for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
if (nfp_prog->subprog[idx].needs_reg_push)
return true;
return false;
}
static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) static void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
{ {
u8 reg; u8 reg;
...@@ -3612,7 +3649,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) ...@@ -3612,7 +3649,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog)
WARN_ON(1); WARN_ON(1);
} }
if (nfp_prog->subprog_cnt == 1) if (!nfp_prog_needs_callee_reg_save(nfp_prog))
return; return;
nfp_push_callee_registers(nfp_prog); nfp_push_callee_registers(nfp_prog);
...@@ -4354,10 +4391,20 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) ...@@ -4354,10 +4391,20 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
nfp_prog->tgt_abort + bv->start_off); nfp_prog->tgt_abort + bv->start_off);
break; break;
case RELO_BR_GO_CALL_PUSH_REGS: case RELO_BR_GO_CALL_PUSH_REGS:
if (!nfp_prog->tgt_call_push_regs) {
pr_err("BUG: failed to detect subprogram registers needs\n");
err = -EINVAL;
goto err_free_prog;
}
off = nfp_prog->tgt_call_push_regs + bv->start_off; off = nfp_prog->tgt_call_push_regs + bv->start_off;
br_set_offset(&prog[i], off); br_set_offset(&prog[i], off);
break; break;
case RELO_BR_GO_CALL_POP_REGS: case RELO_BR_GO_CALL_POP_REGS:
if (!nfp_prog->tgt_call_pop_regs) {
pr_err("BUG: failed to detect subprogram registers needs\n");
err = -EINVAL;
goto err_free_prog;
}
off = nfp_prog->tgt_call_pop_regs + bv->start_off; off = nfp_prog->tgt_call_pop_regs + bv->start_off;
br_set_offset(&prog[i], off); br_set_offset(&prog[i], off);
break; break;
......
...@@ -452,9 +452,11 @@ static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta) ...@@ -452,9 +452,11 @@ static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta)
/** /**
* struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info
* @stack_depth: maximum stack depth used by this sub-program * @stack_depth: maximum stack depth used by this sub-program
* @needs_reg_push: whether sub-program uses callee-saved registers
*/ */
struct nfp_bpf_subprog_info { struct nfp_bpf_subprog_info {
u16 stack_depth; u16 stack_depth;
u8 needs_reg_push : 1;
}; };
/** /**
......
...@@ -644,7 +644,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) ...@@ -644,7 +644,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
} }
static int static int
nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog) nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env,
struct nfp_prog *nfp_prog)
{ {
struct nfp_insn_meta *meta; struct nfp_insn_meta *meta;
int index = 0; int index = 0;
...@@ -653,6 +654,10 @@ nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog) ...@@ -653,6 +654,10 @@ nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog)
if (nfp_is_subprog_start(meta)) if (nfp_is_subprog_start(meta))
index++; index++;
meta->subprog_idx = index; meta->subprog_idx = index;
if (meta->insn.dst_reg >= BPF_REG_6 &&
meta->insn.dst_reg <= BPF_REG_9)
nfp_prog->subprog[index].needs_reg_push = 1;
} }
if (index + 1 != nfp_prog->subprog_cnt) { if (index + 1 != nfp_prog->subprog_cnt) {
...@@ -734,7 +739,7 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env) ...@@ -734,7 +739,7 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
if (!nfp_prog->subprog) if (!nfp_prog->subprog)
return -ENOMEM; return -ENOMEM;
nfp_assign_subprog_idx(env, nfp_prog); nfp_assign_subprog_idx_and_regs(env, nfp_prog);
info = env->subprog_info; info = env->subprog_info;
for (i = 0; i < nfp_prog->subprog_cnt; i++) { for (i = 0; i < nfp_prog->subprog_cnt; i++) {
...@@ -745,8 +750,9 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env) ...@@ -745,8 +750,9 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
/* Account for size of return address. */ /* Account for size of return address. */
nfp_prog->subprog[i].stack_depth += REG_WIDTH; nfp_prog->subprog[i].stack_depth += REG_WIDTH;
/* Account for size of saved registers. */ /* Account for size of saved registers, if necessary. */
nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4; if (nfp_prog->subprog[i].needs_reg_push)
nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4;
} }
nn = netdev_priv(env->prog->aux->offload->netdev); nn = netdev_priv(env->prog->aux->offload->netdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment