Commit 073ca6a0 authored by Luke Nelson's avatar Luke Nelson Committed by Daniel Borkmann

bpf, riscv: Optimize BPF_JSET BPF_K using andi on RV64

This patch optimizes BPF_JSET BPF_K by using a RISC-V andi instruction
when the BPF immediate fits in 12 bits, instead of first loading the
immediate to a temporary register.

Examples of generated code with and without this optimization:

BPF_JMP_IMM(BPF_JSET, R1, 2, 1) without optimization:

  20: li    t1,2
  24: and   t1,a0,t1
  28: bnez  t1,0x30

BPF_JMP_IMM(BPF_JSET, R1, 2, 1) with optimization:

  20: andi  t1,a0,2
  24: bnez  t1,0x2c

BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) without optimization:

  20: li    t1,2
  24: mv    t2,a0
  28: slli  t2,t2,0x20
  2c: srli  t2,t2,0x20
  30: slli  t1,t1,0x20
  34: srli  t1,t1,0x20
  38: and   t1,t2,t1
  3c: bnez  t1,0x44

BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) with optimization:

  20: andi  t1,a0,2
  24: bnez  t1,0x2c

In these examples, because the upper 32 bits of the sign-extended
immediate are 0, BPF_JMP BPF_JSET and BPF_JMP32 BPF_JSET are equivalent
and therefore the JIT produces identical code for them.
Co-developed-by: default avatarXi Wang <xi.wang@gmail.com>
Signed-off-by: default avatarXi Wang <xi.wang@gmail.com>
Signed-off-by: default avatarLuke Nelson <luke.r.nels@gmail.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarBjörn Töpel <bjorn.topel@gmail.com>
Acked-by: default avatarBjörn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/bpf/20200506000320.28965-5-luke.r.nels@gmail.com
parent ca349a6a
...@@ -792,8 +792,6 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, ...@@ -792,8 +792,6 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
rvoff = rv_offset(i, off, ctx); rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns; s = ctx->ninsns;
if (imm) { if (imm) {
...@@ -813,15 +811,28 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, ...@@ -813,15 +811,28 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* Adjust for extra insns */ /* Adjust for extra insns */
rvoff -= (e - s) << 2; rvoff -= (e - s) << 2;
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
break;
if (BPF_OP(code) == BPF_JSET) { case BPF_JMP | BPF_JSET | BPF_K:
/* Adjust for and */ case BPF_JMP32 | BPF_JSET | BPF_K:
rvoff -= 4; rvoff = rv_offset(i, off, ctx);
emit(rv_and(rs, rd, rs), ctx); s = ctx->ninsns;
emit_branch(BPF_JNE, rs, RV_REG_ZERO, rvoff, ctx); if (is_12b_int(imm)) {
emit(rv_andi(RV_REG_T1, rd, imm), ctx);
} else { } else {
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); emit_imm(RV_REG_T1, imm, ctx);
emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
} }
/* For jset32, we should clear the upper 32 bits of t1, but
* sign-extension is sufficient here and saves one instruction,
* as t1 is used only in comparison against zero.
*/
if (!is64 && imm < 0)
emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
e = ctx->ninsns;
rvoff -= (e - s) << 2;
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
break; break;
/* function call */ /* function call */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment