Commit 67e4ca74 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf, x64: optimize JIT's pro/epilogue'

Maciej Fijalkowski says:

====================
Hi!

This small set can be considered as a followup after recent addition of
support for tailcalls in bpf subprograms and is focused on optimizing
x64 JIT prologue and epilogue sections.

Turns out the popping tail call counter is not needed anymore and %rsp
handling when stack depth is 0 can be skipped.

For longer explanations, please see commit messages.

Thank you,
Maciej
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6458bde3 4d0b8c0b
...@@ -281,6 +281,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, ...@@ -281,6 +281,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
EMIT1(0x55); /* push rbp */ EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */ /* sub rsp, rounded_stack_depth */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
if (tail_call_reachable) if (tail_call_reachable)
EMIT1(0x50); /* push rax */ EMIT1(0x50); /* push rax */
...@@ -407,9 +408,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, ...@@ -407,9 +408,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
int tcc_off = -4 - round_up(stack_depth, 8); int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog; u8 *prog = *pprog;
int pop_bytes = 0; int pop_bytes = 0;
int off1 = 49; int off1 = 42;
int off2 = 38; int off2 = 31;
int off3 = 16; int off3 = 9;
int cnt = 0; int cnt = 0;
/* count the additional bytes used for popping callee regs from stack /* count the additional bytes used for popping callee regs from stack
...@@ -421,6 +422,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, ...@@ -421,6 +422,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
off2 += pop_bytes; off2 += pop_bytes;
off3 += pop_bytes; off3 += pop_bytes;
if (stack_depth) {
off1 += 7;
off2 += 7;
off3 += 7;
}
/* /*
* rdi - pointer to ctx * rdi - pointer to ctx
* rsi - pointer to bpf_array * rsi - pointer to bpf_array
...@@ -465,6 +472,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, ...@@ -465,6 +472,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
prog = *pprog; prog = *pprog;
EMIT1(0x58); /* pop rax */ EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
round_up(stack_depth, 8)); round_up(stack_depth, 8));
...@@ -491,7 +499,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, ...@@ -491,7 +499,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
int tcc_off = -4 - round_up(stack_depth, 8); int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog; u8 *prog = *pprog;
int pop_bytes = 0; int pop_bytes = 0;
int off1 = 27; int off1 = 20;
int poke_off; int poke_off;
int cnt = 0; int cnt = 0;
...@@ -506,10 +514,14 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, ...@@ -506,10 +514,14 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
* total bytes for: * total bytes for:
* - nop5/ jmpq $off * - nop5/ jmpq $off
* - pop callee regs * - pop callee regs
* - sub rsp, $val * - sub rsp, $val if depth > 0
* - pop rax * - pop rax
*/ */
poke_off = X86_PATCH_SIZE + pop_bytes + 7 + 1; poke_off = X86_PATCH_SIZE + pop_bytes + 1;
if (stack_depth) {
poke_off += 7;
off1 += 7;
}
/* /*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
...@@ -533,6 +545,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, ...@@ -533,6 +545,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
pop_callee_regs(pprog, callee_regs_used); pop_callee_regs(pprog, callee_regs_used);
prog = *pprog; prog = *pprog;
EMIT1(0x58); /* pop rax */ EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
...@@ -1441,8 +1454,6 @@ xadd: if (is_imm8(insn->off)) ...@@ -1441,8 +1454,6 @@ xadd: if (is_imm8(insn->off))
/* Update cleanup_addr */ /* Update cleanup_addr */
ctx->cleanup_addr = proglen; ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used); pop_callee_regs(&prog, callee_regs_used);
if (tail_call_reachable)
EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
EMIT1(0xC9); /* leave */ EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */ EMIT1(0xC3); /* ret */
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment