Commit 7bdbf744 authored by Andrii Nakryiko's avatar Andrii Nakryiko Committed by Alexei Starovoitov

bpf: add special internal-only MOV instruction to resolve per-CPU addrs

Add a new BPF instruction for resolving absolute addresses of per-CPU
data from their per-CPU offsets. This instruction is internal-only and
users are not allowed to use them directly. They will only be used for
internal inlining optimizations for now between BPF verifier and BPF JITs.

We use a special BPF_MOV | BPF_ALU64 | BPF_X form with insn->off field
set to BPF_ADDR_PERCPU = -1. I used negative offset value to distinguish
them from positive ones used by user-exposed instructions.

Such instruction performs a resolution of a per-CPU offset stored in
a register to a valid kernel address which can be dereferenced. It is
useful in any use case where absolute address of a per-CPU data has to
be resolved (e.g., in inlining bpf_map_lookup_elem()).

BPF disassembler is also taught to recognize them to support dumping
final BPF assembly code (non-JIT'ed version).

Add arch-specific way for BPF JITs to mark support for this instructions.

This patch also adds support for these instructions in x86-64 BPF JIT.
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20240402021307.1012571-2-andrii@kernel.orgSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 2e114248
...@@ -1382,6 +1382,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image ...@@ -1382,6 +1382,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
maybe_emit_mod(&prog, AUX_REG, dst_reg, true); maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg)); EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
break; break;
} else if (insn_is_mov_percpu_addr(insn)) {
u32 off = (u32)(unsigned long)&this_cpu_off;
/* mov <dst>, <src> (if necessary) */
EMIT_mov(dst_reg, src_reg);
/* add <dst>, gs:[<off>] */
EMIT2(0x65, add_1mod(0x48, dst_reg));
EMIT3(0x03, add_1reg(0x04, dst_reg), 0x25);
EMIT(off, 4);
break;
} }
fallthrough; fallthrough;
case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU | BPF_MOV | BPF_X:
...@@ -3365,6 +3376,11 @@ bool bpf_jit_supports_subprog_tailcalls(void) ...@@ -3365,6 +3376,11 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true; return true;
} }
bool bpf_jit_supports_percpu_insn(void)
{
return true;
}
void bpf_jit_free(struct bpf_prog *prog) void bpf_jit_free(struct bpf_prog *prog)
{ {
if (prog->jited) { if (prog->jited) {
......
...@@ -178,6 +178,25 @@ struct ctl_table_header; ...@@ -178,6 +178,25 @@ struct ctl_table_header;
.off = 0, \ .off = 0, \
.imm = 0 }) .imm = 0 })
/* Special (internal-only) form of mov, used to resolve per-CPU addrs:
* dst_reg = src_reg + <percpu_base_off>
* BPF_ADDR_PERCPU is used as a special insn->off value.
*/
#define BPF_ADDR_PERCPU (-1)
#define BPF_MOV64_PERCPU_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = BPF_ADDR_PERCPU, \
.imm = 0 })
static inline bool insn_is_mov_percpu_addr(const struct bpf_insn *insn)
{
return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU;
}
/* Short form of mov, dst_reg = imm32 */ /* Short form of mov, dst_reg = imm32 */
#define BPF_MOV64_IMM(DST, IMM) \ #define BPF_MOV64_IMM(DST, IMM) \
...@@ -972,6 +991,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); ...@@ -972,6 +991,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void); bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_percpu_insn(void);
bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_exceptions(void);
......
...@@ -2945,6 +2945,11 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void) ...@@ -2945,6 +2945,11 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
return false; return false;
} }
bool __weak bpf_jit_supports_percpu_insn(void)
{
return false;
}
bool __weak bpf_jit_supports_kfunc_call(void) bool __weak bpf_jit_supports_kfunc_call(void)
{ {
return false; return false;
......
...@@ -172,6 +172,17 @@ static bool is_addr_space_cast(const struct bpf_insn *insn) ...@@ -172,6 +172,17 @@ static bool is_addr_space_cast(const struct bpf_insn *insn)
insn->off == BPF_ADDR_SPACE_CAST; insn->off == BPF_ADDR_SPACE_CAST;
} }
/* Special (internal-only) form of mov, used to resolve per-CPU addrs:
* dst_reg = src_reg + <percpu_base_off>
* BPF_ADDR_PERCPU is used as a special insn->off value.
*/
#define BPF_ADDR_PERCPU (-1)
static inline bool is_mov_percpu_addr(const struct bpf_insn *insn)
{
return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU;
}
void print_bpf_insn(const struct bpf_insn_cbs *cbs, void print_bpf_insn(const struct bpf_insn_cbs *cbs,
const struct bpf_insn *insn, const struct bpf_insn *insn,
bool allow_ptr_leaks) bool allow_ptr_leaks)
...@@ -194,6 +205,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, ...@@ -194,6 +205,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n", verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n",
insn->code, insn->dst_reg, insn->code, insn->dst_reg,
insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm); insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm);
} else if (is_mov_percpu_addr(insn)) {
verbose(cbs->private_data, "(%02x) r%d = &(void __percpu *)(r%d)\n",
insn->code, insn->dst_reg, insn->src_reg);
} else if (BPF_SRC(insn->code) == BPF_X) { } else if (BPF_SRC(insn->code) == BPF_X) {
verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n", verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n",
insn->code, class == BPF_ALU ? 'w' : 'r', insn->code, class == BPF_ALU ? 'w' : 'r',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment