Commit ee3e2469 authored by Peter Zijlstra's avatar Peter Zijlstra

x86/ftrace: Make it call depth tracking aware

Since ftrace has trampolines, don't use thunks for the __fentry__ site
but instead require that every function called from there includes
accounting. This very much includes all the direct-call functions.

Additionally, ftrace uses ROP tricks in two places:

 - return_to_handler(), and
 - ftrace_regs_caller() when pt_regs->orig_ax is set by a direct-call.

return_to_handler() already uses a retpoline to replace an
indirect-jump to defeat IBT, since this is a jump-type retpoline, make
sure there is no accounting done and ALTERNATIVE the RET into a ret.

ftrace_regs_caller() does much the same and gets the same treatment.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.927545073@infradead.org
parent 36b64f10
...@@ -343,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void) ...@@ -343,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void)
{ {
x86_return_thunk = &__x86_return_skl; x86_return_thunk = &__x86_return_skl;
} }
#define CALL_DEPTH_ACCOUNT \
ALTERNATIVE("", \
__stringify(INCREMENT_CALL_DEPTH), \
X86_FEATURE_CALL_DEPTH)
#ifdef CONFIG_CALL_THUNKS_DEBUG #ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count); DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count); DECLARE_PER_CPU(u64, __x86_ret_count);
...@@ -351,6 +357,9 @@ DECLARE_PER_CPU(u64, __x86_ctxsw_count); ...@@ -351,6 +357,9 @@ DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif #endif
#else #else
static inline void x86_set_skl_return_thunk(void) {} static inline void x86_set_skl_return_thunk(void) {}
#define CALL_DEPTH_ACCOUNT ""
#endif #endif
#ifdef CONFIG_RETPOLINE #ifdef CONFIG_RETPOLINE
......
...@@ -316,7 +316,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) ...@@ -316,7 +316,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0; return 0;
/* Is function call target a thunk? */ /* Is function call target a thunk? */
if (is_callthunk(func)) if (func && is_callthunk(func))
return 0; return 0;
memcpy(*pprog, tmpl, tmpl_size); memcpy(*pprog, tmpl, tmpl_size);
......
...@@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void) ...@@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void)
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{ {
/*
* No need to translate into a callthunk. The trampoline does
* the depth accounting itself.
*/
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
} }
...@@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ...@@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long size; unsigned long size;
unsigned long *ptr; unsigned long *ptr;
void *trampoline; void *trampoline;
void *ip; void *ip, *dest;
/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
...@@ -404,10 +408,14 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ...@@ -404,10 +408,14 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* put in the call to the function */ /* put in the call to the function */
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
call_offset -= start_offset; call_offset -= start_offset;
/*
* No need to translate into a callthunk. The trampoline does
* the depth accounting before the call already.
*/
dest = ftrace_ops_get_func(ops);
memcpy(trampoline + call_offset, memcpy(trampoline + call_offset,
text_gen_insn(CALL_INSN_OPCODE, text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
trampoline + call_offset, CALL_INSN_SIZE);
ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */ /* ALLOC_TRAMP flags lets us know we created it */
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/ftrace.h> #include <asm/ftrace.h>
#include <asm/export.h> #include <asm/export.h>
...@@ -132,6 +133,7 @@ ...@@ -132,6 +133,7 @@
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
SYM_FUNC_START(__fentry__) SYM_FUNC_START(__fentry__)
CALL_DEPTH_ACCOUNT
RET RET
SYM_FUNC_END(__fentry__) SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__) EXPORT_SYMBOL(__fentry__)
...@@ -140,6 +142,8 @@ SYM_FUNC_START(ftrace_caller) ...@@ -140,6 +142,8 @@ SYM_FUNC_START(ftrace_caller)
/* save_mcount_regs fills in first two parameters */ /* save_mcount_regs fills in first two parameters */
save_mcount_regs save_mcount_regs
CALL_DEPTH_ACCOUNT
/* Stack - skipping return address of ftrace_caller */ /* Stack - skipping return address of ftrace_caller */
leaq MCOUNT_REG_SIZE+8(%rsp), %rcx leaq MCOUNT_REG_SIZE+8(%rsp), %rcx
movq %rcx, RSP(%rsp) movq %rcx, RSP(%rsp)
...@@ -155,6 +159,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) ...@@ -155,6 +159,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
/* Only ops with REGS flag set should have CS register set */ /* Only ops with REGS flag set should have CS register set */
movq $0, CS(%rsp) movq $0, CS(%rsp)
/* Account for the function call below */
CALL_DEPTH_ACCOUNT
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
call ftrace_stub call ftrace_stub
...@@ -189,6 +196,8 @@ SYM_FUNC_START(ftrace_regs_caller) ...@@ -189,6 +196,8 @@ SYM_FUNC_START(ftrace_regs_caller)
save_mcount_regs 8 save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */ /* save_mcount_regs fills in first two parameters */
CALL_DEPTH_ACCOUNT
SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
/* Load the ftrace_ops into the 3rd parameter */ /* Load the ftrace_ops into the 3rd parameter */
...@@ -219,6 +228,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) ...@@ -219,6 +228,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
/* regs go into 4th parameter */ /* regs go into 4th parameter */
leaq (%rsp), %rcx leaq (%rsp), %rcx
/* Account for the function call below */
CALL_DEPTH_ACCOUNT
SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
call ftrace_stub call ftrace_stub
...@@ -282,7 +294,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) ...@@ -282,7 +294,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
int3 int3
.Ldo_rebalance: .Ldo_rebalance:
add $8, %rsp add $8, %rsp
RET ALTERNATIVE __stringify(RET), \
__stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
X86_FEATURE_CALL_DEPTH
SYM_FUNC_END(ftrace_regs_caller) SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
...@@ -291,6 +305,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) ...@@ -291,6 +305,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */ #else /* ! CONFIG_DYNAMIC_FTRACE */
SYM_FUNC_START(__fentry__) SYM_FUNC_START(__fentry__)
CALL_DEPTH_ACCOUNT
cmpq $ftrace_stub, ftrace_trace_function cmpq $ftrace_stub, ftrace_trace_function
jnz trace jnz trace
...@@ -347,6 +363,8 @@ SYM_CODE_START(return_to_handler) ...@@ -347,6 +363,8 @@ SYM_CODE_START(return_to_handler)
int3 int3
.Ldo_rop: .Ldo_rop:
mov %rdi, (%rsp) mov %rdi, (%rsp)
RET ALTERNATIVE __stringify(RET), \
__stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
X86_FEATURE_CALL_DEPTH
SYM_CODE_END(return_to_handler) SYM_CODE_END(return_to_handler)
#endif #endif
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <asm/extable.h> #include <asm/extable.h>
#include <asm/ftrace.h>
#include <asm/set_memory.h> #include <asm/set_memory.h>
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
#include <asm/text-patching.h> #include <asm/text-patching.h>
...@@ -2135,6 +2136,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -2135,6 +2136,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
prog = image; prog = image;
EMIT_ENDBR(); EMIT_ENDBR();
/*
* This is the direct-call trampoline, as such it needs accounting
* for the __fentry__ call.
*/
x86_call_depth_emit_accounting(&prog, NULL);
EMIT1(0x55); /* push rbp */ EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
......
...@@ -785,7 +785,14 @@ static struct fgraph_ops fgraph_ops __initdata = { ...@@ -785,7 +785,14 @@ static struct fgraph_ops fgraph_ops __initdata = {
}; };
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
noinline __noclone static void trace_direct_tramp(void) { } #ifndef CALL_DEPTH_ACCOUNT
#define CALL_DEPTH_ACCOUNT ""
#endif
noinline __noclone static void trace_direct_tramp(void)
{
asm(CALL_DEPTH_ACCOUNT);
}
#endif #endif
/* /*
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func1(void); extern void my_direct_func1(void);
extern void my_direct_func2(void); extern void my_direct_func2(void);
...@@ -34,6 +35,7 @@ asm ( ...@@ -34,6 +35,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" call my_direct_func1\n" " call my_direct_func1\n"
" leave\n" " leave\n"
" .size my_tramp1, .-my_tramp1\n" " .size my_tramp1, .-my_tramp1\n"
...@@ -45,6 +47,7 @@ asm ( ...@@ -45,6 +47,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" call my_direct_func2\n" " call my_direct_func2\n"
" leave\n" " leave\n"
ASM_RET ASM_RET
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func1(unsigned long ip); extern void my_direct_func1(unsigned long ip);
extern void my_direct_func2(unsigned long ip); extern void my_direct_func2(unsigned long ip);
...@@ -32,6 +33,7 @@ asm ( ...@@ -32,6 +33,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n" " pushq %rdi\n"
" movq 8(%rbp), %rdi\n" " movq 8(%rbp), %rdi\n"
" call my_direct_func1\n" " call my_direct_func1\n"
...@@ -46,6 +48,7 @@ asm ( ...@@ -46,6 +48,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n" " pushq %rdi\n"
" movq 8(%rbp), %rdi\n" " movq 8(%rbp), %rdi\n"
" call my_direct_func2\n" " call my_direct_func2\n"
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/sched/stat.h> #include <linux/sched/stat.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func(unsigned long ip); extern void my_direct_func(unsigned long ip);
...@@ -27,6 +28,7 @@ asm ( ...@@ -27,6 +28,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n" " pushq %rdi\n"
" movq 8(%rbp), %rdi\n" " movq 8(%rbp), %rdi\n"
" call my_direct_func\n" " call my_direct_func\n"
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/mm.h> /* for handle_mm_fault() */ #include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func(struct vm_area_struct *vma, extern void my_direct_func(struct vm_area_struct *vma,
unsigned long address, unsigned int flags); unsigned long address, unsigned int flags);
...@@ -29,6 +30,7 @@ asm ( ...@@ -29,6 +30,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n" " pushq %rdi\n"
" pushq %rsi\n" " pushq %rsi\n"
" pushq %rdx\n" " pushq %rdx\n"
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/sched.h> /* for wake_up_process() */ #include <linux/sched.h> /* for wake_up_process() */
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/nospec-branch.h>
extern void my_direct_func(struct task_struct *p); extern void my_direct_func(struct task_struct *p);
...@@ -26,6 +27,7 @@ asm ( ...@@ -26,6 +27,7 @@ asm (
ASM_ENDBR ASM_ENDBR
" pushq %rbp\n" " pushq %rbp\n"
" movq %rsp, %rbp\n" " movq %rsp, %rbp\n"
CALL_DEPTH_ACCOUNT
" pushq %rdi\n" " pushq %rdi\n"
" call my_direct_func\n" " call my_direct_func\n"
" popq %rdi\n" " popq %rdi\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment