Commit 1f499d6a authored by David S. Miller's avatar David S. Miller

Merge branch 'filter-next'

Alexei Starovoitov says:

====================
internal BPF jit for x64 and JITed seccomp

Internal BPF JIT compiler for x86_64 replaces classic BPF JIT.
Use it in seccomp and in tracing filters (sent as separate patch)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9509b1c1 8f577cad
...@@ -12,13 +12,16 @@ ...@@ -12,13 +12,16 @@
/* /*
* Calling convention : * Calling convention :
* rdi : skb pointer * rbx : skb pointer (callee saved)
* esi : offset of byte(s) to fetch in skb (can be scratched) * esi : offset of byte(s) to fetch in skb (can be scratched)
* r8 : copy of skb->data * r10 : copy of skb->data
* r9d : hlen = skb->len - skb->data_len * r9d : hlen = skb->len - skb->data_len
*/ */
#define SKBDATA %r8 #define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
#define MAX_BPF_STACK (512 /* from filter.h */ + \
32 /* space for rbx,r13,r14,r15 */ + \
8 /* space for skb_copy_bits */)
sk_load_word: sk_load_word:
.globl sk_load_word .globl sk_load_word
...@@ -68,53 +71,31 @@ sk_load_byte_positive_offset: ...@@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
movzbl (SKBDATA,%rsi),%eax movzbl (SKBDATA,%rsi),%eax
ret ret
/**
* sk_load_byte_msh - BPF_S_LDX_B_MSH helper
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
* Inputs : %esi is the offset value
*/
sk_load_byte_msh:
.globl sk_load_byte_msh
test %esi,%esi
js bpf_slow_path_byte_msh_neg
sk_load_byte_msh_positive_offset:
.globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
/* rsi contains offset and can be scratched */ /* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \ #define bpf_slow_path_common(LEN) \
push %rdi; /* save skb */ \ mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \ push %r9; \
push SKBDATA; \ push SKBDATA; \
/* rsi already has offset */ \ /* rsi already has offset */ \
mov $LEN,%ecx; /* len */ \ mov $LEN,%ecx; /* len */ \
lea -12(%rbp),%rdx; \ lea - MAX_BPF_STACK + 32(%rbp),%rdx; \
call skb_copy_bits; \ call skb_copy_bits; \
test %eax,%eax; \ test %eax,%eax; \
pop SKBDATA; \ pop SKBDATA; \
pop %r9; \ pop %r9;
pop %rdi
bpf_slow_path_word: bpf_slow_path_word:
bpf_slow_path_common(4) bpf_slow_path_common(4)
js bpf_error js bpf_error
mov -12(%rbp),%eax mov - MAX_BPF_STACK + 32(%rbp),%eax
bswap %eax bswap %eax
ret ret
bpf_slow_path_half: bpf_slow_path_half:
bpf_slow_path_common(2) bpf_slow_path_common(2)
js bpf_error js bpf_error
mov -12(%rbp),%ax mov - MAX_BPF_STACK + 32(%rbp),%ax
rol $8,%ax rol $8,%ax
movzwl %ax,%eax movzwl %ax,%eax
ret ret
...@@ -122,21 +103,11 @@ bpf_slow_path_half: ...@@ -122,21 +103,11 @@ bpf_slow_path_half:
bpf_slow_path_byte: bpf_slow_path_byte:
bpf_slow_path_common(1) bpf_slow_path_common(1)
js bpf_error js bpf_error
movzbl -12(%rbp),%eax movzbl - MAX_BPF_STACK + 32(%rbp),%eax
ret
bpf_slow_path_byte_msh:
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
bpf_slow_path_common(1)
js bpf_error
movzbl -12(%rbp),%eax
and $15,%al
shl $2,%al
xchg %eax,%ebx
ret ret
#define sk_negative_common(SIZE) \ #define sk_negative_common(SIZE) \
push %rdi; /* save skb */ \ mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \ push %r9; \
push SKBDATA; \ push SKBDATA; \
/* rsi already has offset */ \ /* rsi already has offset */ \
...@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh: ...@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
test %rax,%rax; \ test %rax,%rax; \
pop SKBDATA; \ pop SKBDATA; \
pop %r9; \ pop %r9; \
pop %rdi; \
jz bpf_error jz bpf_error
bpf_slow_path_word_neg: bpf_slow_path_word_neg:
cmp SKF_MAX_NEG_OFF, %esi /* test range */ cmp SKF_MAX_NEG_OFF, %esi /* test range */
jl bpf_error /* offset lower -> error */ jl bpf_error /* offset lower -> error */
...@@ -179,22 +148,12 @@ sk_load_byte_negative_offset: ...@@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
movzbl (%rax), %eax movzbl (%rax), %eax
ret ret
bpf_slow_path_byte_msh_neg:
cmp SKF_MAX_NEG_OFF, %esi
jl bpf_error
sk_load_byte_msh_negative_offset:
.globl sk_load_byte_msh_negative_offset
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
sk_negative_common(1)
movzbl (%rax),%eax
and $15,%al
shl $2,%al
xchg %eax,%ebx
ret
bpf_error: bpf_error:
# force a return 0 from jit handler # force a return 0 from jit handler
xor %eax,%eax xor %eax,%eax
mov -8(%rbp),%rbx mov - MAX_BPF_STACK(%rbp),%rbx
mov - MAX_BPF_STACK + 8(%rbp),%r13
mov - MAX_BPF_STACK + 16(%rbp),%r14
mov - MAX_BPF_STACK + 24(%rbp),%r15
leaveq leaveq
ret ret
This diff is collapsed.
...@@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); ...@@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
void sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
void bpf_int_jit_compile(struct sk_filter *fp);
#ifdef CONFIG_BPF_JIT #ifdef CONFIG_BPF_JIT
#include <stdarg.h> #include <stdarg.h>
#include <linux/linkage.h> #include <linux/linkage.h>
......
...@@ -54,8 +54,7 @@ ...@@ -54,8 +54,7 @@
struct seccomp_filter { struct seccomp_filter {
atomic_t usage; atomic_t usage;
struct seccomp_filter *prev; struct seccomp_filter *prev;
unsigned short len; /* Instruction count */ struct sk_filter *prog;
struct sock_filter_int insnsi[];
}; };
/* Limit any path through the tree to 256KB worth of instructions. */ /* Limit any path through the tree to 256KB worth of instructions. */
...@@ -189,7 +188,8 @@ static u32 seccomp_run_filters(int syscall) ...@@ -189,7 +188,8 @@ static u32 seccomp_run_filters(int syscall)
* value always takes priority (ignoring the DATA). * value always takes priority (ignoring the DATA).
*/ */
for (f = current->seccomp.filter; f; f = f->prev) { for (f = current->seccomp.filter; f; f = f->prev) {
u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi); u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret; ret = cur_ret;
} }
...@@ -215,7 +215,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ...@@ -215,7 +215,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
return -EINVAL; return -EINVAL;
for (filter = current->seccomp.filter; filter; filter = filter->prev) for (filter = current->seccomp.filter; filter; filter = filter->prev)
total_insns += filter->len + 4; /* include a 4 instr penalty */ total_insns += filter->prog->len + 4; /* include a 4 instr penalty */
if (total_insns > MAX_INSNS_PER_PATH) if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM; return -ENOMEM;
...@@ -256,19 +256,27 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ...@@ -256,19 +256,27 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
/* Allocate a new seccomp_filter */ /* Allocate a new seccomp_filter */
ret = -ENOMEM; ret = -ENOMEM;
filter = kzalloc(sizeof(struct seccomp_filter) + filter = kzalloc(sizeof(struct seccomp_filter),
sizeof(struct sock_filter_int) * new_len,
GFP_KERNEL|__GFP_NOWARN); GFP_KERNEL|__GFP_NOWARN);
if (!filter) if (!filter)
goto free_prog; goto free_prog;
ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); filter->prog = kzalloc(sk_filter_size(new_len),
if (ret) GFP_KERNEL|__GFP_NOWARN);
if (!filter->prog)
goto free_filter; goto free_filter;
ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
if (ret)
goto free_filter_prog;
kfree(fp); kfree(fp);
atomic_set(&filter->usage, 1); atomic_set(&filter->usage, 1);
filter->len = new_len; filter->prog->len = new_len;
filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;
/* JIT internal BPF into native HW instructions */
bpf_int_jit_compile(filter->prog);
/* /*
* If there is an existing filter, make it the prev and don't drop its * If there is an existing filter, make it the prev and don't drop its
...@@ -278,6 +286,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ...@@ -278,6 +286,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
current->seccomp.filter = filter; current->seccomp.filter = filter;
return 0; return 0;
free_filter_prog:
kfree(filter->prog);
free_filter: free_filter:
kfree(filter); kfree(filter);
free_prog: free_prog:
...@@ -330,6 +340,7 @@ void put_seccomp_filter(struct task_struct *tsk) ...@@ -330,6 +340,7 @@ void put_seccomp_filter(struct task_struct *tsk)
while (orig && atomic_dec_and_test(&orig->usage)) { while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig; struct seccomp_filter *freeme = orig;
orig = orig->prev; orig = orig->prev;
bpf_jit_free(freeme->prog);
kfree(freeme); kfree(freeme);
} }
} }
......
...@@ -1524,6 +1524,10 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, ...@@ -1524,6 +1524,10 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
return ERR_PTR(err); return ERR_PTR(err);
} }
void __weak bpf_int_jit_compile(struct sk_filter *prog)
{
}
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
struct sock *sk) struct sock *sk)
{ {
...@@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, ...@@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
/* JIT compiler couldn't process this filter, so do the /* JIT compiler couldn't process this filter, so do the
* internal BPF translation for the optimized interpreter. * internal BPF translation for the optimized interpreter.
*/ */
if (!fp->jited) if (!fp->jited) {
fp = __sk_migrate_filter(fp, sk); fp = __sk_migrate_filter(fp, sk);
/* Probe if internal BPF can be jit-ed */
bpf_int_jit_compile(fp);
}
return fp; return fp;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment