Commit 4a6d70c9 authored by Steven Rostedt's avatar Steven Rostedt Committed by Steven Rostedt

ftrace/x86: Remove the complex ftrace NMI handling code

As ftrace function tracing would require modifying code that could
be executed in NMI context, which is not stopped with stop_machine(),
ftrace had to do a complex algorithm with various stages of setup
and memory barriers to make it work.

With the new breakpoint method, this is no longer required. The changes
to the code can be done without any problem in NMI context, as well as
without stop machine altogether. Remove the complex code as it is
no longer needed.

Also, a lot of the notrace annotations could be removed from the
NMI code as it is now safe to trace them. With the exception of
do_nmi itself, which does some special work to handle running in
the debug stack. The breakpoint method can cause NMIs to double
nest the debug stack if it's not setup properly, and that is done
in do_nmi(), thus that function must not be traced.

(Note the arch sh may want to do the same)

Cc: Paul Mundt <lethal@linux-sh.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
parent 08d636b6
...@@ -40,7 +40,6 @@ config X86 ...@@ -40,7 +40,6 @@ config X86
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_SYSCALL_TRACEPOINTS select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KVM select HAVE_KVM
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
......
...@@ -27,38 +27,18 @@ ...@@ -27,38 +27,18 @@
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/ftrace.h> #include <asm/ftrace.h>
#include <asm/nops.h> #include <asm/nops.h>
#include <asm/nmi.h>
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
/*
* modifying_code is set to notify NMIs that they need to use
* memory barriers when entering or exiting. But we don't want
* to burden NMIs with unnecessary memory barriers when code
* modification is not being done (which is most of the time).
*
* A mutex is already held when ftrace_arch_code_modify_prepare
* and post_process are called. No locks need to be taken here.
*
* Stop machine will make sure currently running NMIs are done
* and new NMIs will see the updated variable before we need
* to worry about NMIs doing memory barriers.
*/
static int modifying_code __read_mostly;
static DEFINE_PER_CPU(int, save_modifying_code);
int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_prepare(void)
{ {
set_kernel_text_rw(); set_kernel_text_rw();
set_all_modules_text_rw(); set_all_modules_text_rw();
modifying_code = 1;
return 0; return 0;
} }
int ftrace_arch_code_modify_post_process(void) int ftrace_arch_code_modify_post_process(void)
{ {
modifying_code = 0;
set_all_modules_text_ro(); set_all_modules_text_ro();
set_kernel_text_ro(); set_kernel_text_ro();
return 0; return 0;
...@@ -91,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) ...@@ -91,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code; return calc.code;
} }
/*
* Modifying code must take extra care. On an SMP machine, if
* the code being modified is also being executed on another CPU
* that CPU will have undefined results and possibly take a GPF.
* We use kstop_machine to stop other CPUS from exectuing code.
* But this does not stop NMIs from happening. We still need
* to protect against that. We separate out the modification of
* the code to take care of this.
*
* Two buffers are added: An IP buffer and a "code" buffer.
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
* 2) Wait for any running NMIs to finish and set a flag that says
* we are modifying code, it is done in an atomic operation.
* 3) Write the code
* 4) clear the flag.
* 5) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
* and if it is, it will write what is in the IP and "code" buffers.
*
* The trick is, it does not matter if everyone is writing the same
* content to the code location. Also, if a CPU is executing code
* it is OK to write to that code location if the contents being written
* are the same as what exists.
*/
#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
static void *mod_code_ip; /* holds the IP to write to */
static const void *mod_code_newcode; /* holds the text to write to the IP */
static unsigned nmi_wait_count;
static atomic_t nmi_update_count = ATOMIC_INIT(0);
int ftrace_arch_read_dyn_info(char *buf, int size)
{
int r;
r = snprintf(buf, size, "%u %u",
nmi_wait_count,
atomic_read(&nmi_update_count));
return r;
}
static void clear_mod_flag(void)
{
int old = atomic_read(&nmi_running);
for (;;) {
int new = old & ~MOD_CODE_WRITE_FLAG;
if (old == new)
break;
old = atomic_cmpxchg(&nmi_running, old, new);
}
}
static void ftrace_mod_code(void)
{
/*
* Yes, more than one CPU process can be writing to mod_code_status.
* (and the code itself)
* But if one were to fail, then they all should, and if one were
* to succeed, then they all should.
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
/* if we fail, then kill any new writers */
if (mod_code_status)
clear_mod_flag();
}
void ftrace_nmi_enter(void)
{
__this_cpu_write(save_modifying_code, modifying_code);
if (!__this_cpu_read(save_modifying_code))
return;
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
/* Must have previous changes seen before executions */
smp_mb();
}
void ftrace_nmi_exit(void)
{
if (!__this_cpu_read(save_modifying_code))
return;
/* Finish all executions before clearing nmi_running */
smp_mb();
atomic_dec(&nmi_running);
}
static void wait_for_nmi_and_set_mod_flag(void)
{
if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
return;
do {
cpu_relax();
} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
nmi_wait_count++;
}
static void wait_for_nmi(void)
{
if (!atomic_read(&nmi_running))
return;
do {
cpu_relax();
} while (atomic_read(&nmi_running));
nmi_wait_count++;
}
static inline int static inline int
within(unsigned long addr, unsigned long start, unsigned long end) within(unsigned long addr, unsigned long start, unsigned long end)
{ {
...@@ -239,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) ...@@ -239,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
if (within(ip, (unsigned long)_text, (unsigned long)_etext)) if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip)); ip = (unsigned long)__va(__pa(ip));
mod_code_ip = (void *)ip; return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
smp_mb();
wait_for_nmi_and_set_mod_flag();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
smp_mb();
clear_mod_flag();
wait_for_nmi();
return mod_code_status;
} }
static const unsigned char *ftrace_nop_replace(void) static const unsigned char *ftrace_nop_replace(void)
......
...@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); ...@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type]) #define nmi_to_desc(type) (&nmi_desc[type])
static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{ {
struct nmi_desc *desc = nmi_to_desc(type); struct nmi_desc *desc = nmi_to_desc(type);
struct nmiaction *a; struct nmiaction *a;
...@@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) ...@@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
EXPORT_SYMBOL_GPL(unregister_nmi_handler); EXPORT_SYMBOL_GPL(unregister_nmi_handler);
static notrace __kprobes void static __kprobes void
pci_serr_error(unsigned char reason, struct pt_regs *regs) pci_serr_error(unsigned char reason, struct pt_regs *regs)
{ {
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
...@@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) ...@@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
outb(reason, NMI_REASON_PORT); outb(reason, NMI_REASON_PORT);
} }
static notrace __kprobes void static __kprobes void
io_check_error(unsigned char reason, struct pt_regs *regs) io_check_error(unsigned char reason, struct pt_regs *regs)
{ {
unsigned long i; unsigned long i;
...@@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) ...@@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
outb(reason, NMI_REASON_PORT); outb(reason, NMI_REASON_PORT);
} }
static notrace __kprobes void static __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs *regs) unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{ {
int handled; int handled;
...@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) ...@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(bool, swallow_nmi);
static DEFINE_PER_CPU(unsigned long, last_nmi_rip); static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
static notrace __kprobes void default_do_nmi(struct pt_regs *regs) static __kprobes void default_do_nmi(struct pt_regs *regs)
{ {
unsigned char reason = 0; unsigned char reason = 0;
int handled; int handled;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment