Commit 4913ae39 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tip/perf/core' of...

Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace into perf/core

Pull tracing updates from Steve Rostedt.

This commit:

      tracing: Remove the extra 4 bytes of padding in events

changes the ABI. All involved parties seem to agree that it's safe to
do now, but the devil is in the details ...
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents ff7532ca 0b07436d
......@@ -76,6 +76,15 @@ config OPTPROBES
depends on KPROBES && HAVE_OPTPROBES
depends on !PREEMPT
config KPROBES_ON_FTRACE
def_bool y
depends on KPROBES && HAVE_KPROBES_ON_FTRACE
depends on DYNAMIC_FTRACE_WITH_REGS
help
If function tracer is enabled and the arch supports full
passing of pt_regs to function tracing, then kprobes can
optimize on top of function tracing.
config UPROBES
bool "Transparent user-space probes (EXPERIMENTAL)"
depends on UPROBE_EVENT && PERF_EVENTS
......@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
config HAVE_OPTPROBES
bool
config HAVE_KPROBES_ON_FTRACE
bool
config HAVE_NMI_WATCHDOG
bool
#
......
......@@ -40,10 +40,12 @@ config X86
select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
......
......@@ -44,7 +44,6 @@
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
#endif
#ifndef __ASSEMBLY__
......
......@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
......
#
# Makefile for kernel probes
#
obj-$(CONFIG_KPROBES) += core.o
obj-$(CONFIG_OPTPROBES) += opt.o
obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
......@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
return addr;
}
#endif
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
#else
static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
return 0;
}
#endif
#endif
......@@ -58,7 +58,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
#include "kprobes-common.h"
#include "common.h"
void jprobe_return_end(void);
......@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
* Groups, and some special opcodes can not boost.
* This is non-const and volatile to keep gcc from statically
* optimizing it out, as variable_test_bit makes gcc think only
* *(unsigned long*) is used.
* *(unsigned long*) is used.
*/
static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
......@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
struct __arch_relative_insn {
u8 op;
s32 raddr;
} __attribute__((packed)) *insn;
} __packed *insn;
insn = (struct __arch_relative_insn *)from;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
......@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
#ifdef KPROBES_CAN_USE_FTRACE
static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
/*
* Emulate singlestep (and also recover regs->ip)
* as if there is a 5byte nop
*/
regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
if (unlikely(p->post_handler)) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
__this_cpu_write(current_kprobe, NULL);
}
#endif
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
......@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
#ifdef KPROBES_CAN_USE_FTRACE
if (kprobe_ftrace(p)) {
skip_singlestep(p, regs, kcb);
return 1;
}
#endif
setup_singlestep(p, regs, kcb, 0);
if (!skip_singlestep(p, regs, kcb))
setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
......@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
#ifdef KPROBES_CAN_USE_FTRACE
/* Ftrace callback handler for kprobes */
void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
unsigned long flags;
/* Disable irq for emulating a breakpoint and avoiding preempt */
local_irq_save(flags);
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto end;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
skip_singlestep(p, regs, kcb);
/*
* If pre_handler returns !0, it sets regs->ip and
* resets current kprobe.
*/
}
end:
local_irq_restore(flags);
}
int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
{
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
return 0;
}
#endif
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
......
/*
* Dynamic Ftrace based Kprobes Optimization
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) Hitachi Ltd., 2012
*/
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
#include <linux/preempt.h>
#include <linux/ftrace.h>
#include "common.h"
static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
/*
* Emulate singlestep (and also recover regs->ip)
* as if there is a 5byte nop
*/
regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
if (unlikely(p->post_handler)) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
__this_cpu_write(current_kprobe, NULL);
return 1;
}
int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
if (kprobe_ftrace(p))
return __skip_singlestep(p, regs, kcb);
else
return 0;
}
/* Ftrace callback handler for kprobes */
void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
unsigned long flags;
/* Disable irq for emulating a breakpoint and avoiding preempt */
local_irq_save(flags);
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto end;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
__skip_singlestep(p, regs, kcb);
/*
* If pre_handler returns !0, it sets regs->ip and
* resets current kprobe.
*/
}
end:
local_irq_restore(flags);
}
int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
{
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
return 0;
}
......@@ -37,7 +37,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
#include "kprobes-common.h"
#include "common.h"
unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
......
......@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
* SAVE_REGS - The ftrace_ops wants regs saved at each function called
* and passed to the callback. If this flag is set, but the
* architecture does not support passing regs
* (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
* (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
* ftrace_ops will fail to register, unless the next flag
* is set.
* SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
......@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
#endif
#ifndef FTRACE_REGS_ADDR
#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
#else
# define FTRACE_REGS_ADDR FTRACE_ADDR
......@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
*/
extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/**
* ftrace_modify_call - convert from one addr to another (no nop)
* @rec: the mcount call site record
......
......@@ -49,7 +49,6 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
int padding;
};
#define FTRACE_MAX_EVENT \
......@@ -272,7 +271,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
extern int trace_add_event_call(struct ftrace_event_call *call);
extern void trace_remove_event_call(struct ftrace_event_call *call);
#define is_signed_type(type) (((type)(-1)) < 0)
#define is_signed_type(type) (((type)(-1)) < (type)0)
int trace_set_clr_event(const char *system, const char *event, int set);
......
......@@ -180,10 +180,10 @@ extern void irq_exit(void);
#define nmi_enter() \
do { \
lockdep_off(); \
ftrace_nmi_enter(); \
BUG_ON(in_nmi()); \
add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
lockdep_off(); \
rcu_nmi_enter(); \
trace_hardirq_enter(); \
} while (0)
......@@ -192,10 +192,10 @@ extern void irq_exit(void);
do { \
trace_hardirq_exit(); \
rcu_nmi_exit(); \
lockdep_on(); \
BUG_ON(!in_nmi()); \
sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
ftrace_nmi_exit(); \
lockdep_on(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
......@@ -49,16 +49,6 @@
#define KPROBE_REENTER 0x00000004
#define KPROBE_HIT_SSDONE 0x00000008
/*
* If function tracer is enabled and the arch supports full
* passing of pt_regs to function tracing, then kprobes can
* optimize on top of function tracing.
*/
#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
&& defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
# define KPROBES_CAN_USE_FTRACE
#endif
/* Attach to insert probes on any functions which should be ignored*/
#define __kprobes __attribute__((__section__(".kprobes.text")))
......@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
#endif
#endif /* CONFIG_OPTPROBES */
#ifdef KPROBES_CAN_USE_FTRACE
#ifdef CONFIG_KPROBES_ON_FTRACE
extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs);
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
......
......@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
}
#endif /* CONFIG_OPTPROBES */
#ifdef KPROBES_CAN_USE_FTRACE
#ifdef CONFIG_KPROBES_ON_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
.func = kprobe_ftrace_handler,
.flags = FTRACE_OPS_FL_SAVE_REGS,
......@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
(unsigned long)p->addr, 1, 0);
WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
}
#else /* !KPROBES_CAN_USE_FTRACE */
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
#define arm_kprobe_ftrace(p) do {} while (0)
#define disarm_kprobe_ftrace(p) do {} while (0)
......@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
*/
ftrace_addr = ftrace_location((unsigned long)p->addr);
if (ftrace_addr) {
#ifdef KPROBES_CAN_USE_FTRACE
#ifdef CONFIG_KPROBES_ON_FTRACE
/* Given address is not on the instruction boundary */
if ((unsigned long)p->addr != ftrace_addr)
return -EILSEQ;
p->flags |= KPROBE_FLAG_FTRACE;
#else /* !KPROBES_CAN_USE_FTRACE */
#else /* !CONFIG_KPROBES_ON_FTRACE */
return -EINVAL;
#endif
}
......
......@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
help
See Documentation/trace/ftrace-design.txt
config HAVE_DYNAMIC_FTRACE_WITH_REGS
bool
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
......@@ -434,6 +437,11 @@ config DYNAMIC_FTRACE
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
config DYNAMIC_FTRACE_WITH_REGS
def_bool y
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
......
......@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
return;
local_irq_save(flags);
buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
buf = this_cpu_ptr(bt->msg_data);
va_start(args, fmt);
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
......
......@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
/*
* Traverse the ftrace_global_list, invoking all entries. The reason that we
* can use rcu_dereference_raw() is that elements removed from this list
* are simply leaked, so there is no need to interact with a grace-period
* mechanism. The rcu_dereference_raw() calls are needed to handle
* concurrent insertions into the ftrace_global_list.
*
* Silly Alpha and silly pointer-speculation compiler optimizations!
*/
#define do_for_each_ftrace_op(op, list) \
op = rcu_dereference_raw(list); \
do
/*
* Optimized for just a single item in the list (as that is the normal case).
*/
#define while_for_each_ftrace_op(op) \
while (likely(op = rcu_dereference_raw((op)->next)) && \
unlikely((op) != &ftrace_list_end))
/**
* ftrace_nr_registered_ops - return number of ops registered
*
......@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
return cnt;
}
/*
* Traverse the ftrace_global_list, invoking all entries. The reason that we
* can use rcu_dereference_raw() is that elements removed from this list
* are simply leaked, so there is no need to interact with a grace-period
* mechanism. The rcu_dereference_raw() calls are needed to handle
* concurrent insertions into the ftrace_global_list.
*
* Silly Alpha and silly pointer-speculation compiler optimizations!
*/
static void
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
{
if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
int bit;
bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
if (bit < 0)
return;
trace_recursion_set(TRACE_GLOBAL_BIT);
op = rcu_dereference_raw(ftrace_global_list); /*see above*/
while (op != &ftrace_list_end) {
do_for_each_ftrace_op(op, ftrace_global_list) {
op->func(ip, parent_ip, op, regs);
op = rcu_dereference_raw(op->next); /*see above*/
};
trace_recursion_clear(TRACE_GLOBAL_BIT);
} while_for_each_ftrace_op(op);
trace_clear_recursion(bit);
}
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
......@@ -221,10 +233,24 @@ static void update_global_ops(void)
* registered callers.
*/
if (ftrace_global_list == &ftrace_list_end ||
ftrace_global_list->next == &ftrace_list_end)
ftrace_global_list->next == &ftrace_list_end) {
func = ftrace_global_list->func;
else
/*
* As we are calling the function directly.
* If it does not have recursion protection,
* the function_trace_op needs to be updated
* accordingly.
*/
if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
else
global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
} else {
func = ftrace_global_list_func;
/* The list has its own recursion protection. */
global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
}
/* If we filter on pids, update to use the pid function */
if (!list_empty(&ftrace_pids)) {
......@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
return -EINVAL;
#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/*
* If the ftrace_ops specifies SAVE_REGS, then it only can be used
* if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
......@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
*/
preempt_disable_notrace();
trace_recursion_set(TRACE_CONTROL_BIT);
op = rcu_dereference_raw(ftrace_control_list);
while (op != &ftrace_list_end) {
do_for_each_ftrace_op(op, ftrace_control_list) {
if (!ftrace_function_local_disabled(op) &&
ftrace_ops_test(op, ip))
op->func(ip, parent_ip, op, regs);
op = rcu_dereference_raw(op->next);
};
} while_for_each_ftrace_op(op);
trace_recursion_clear(TRACE_CONTROL_BIT);
preempt_enable_notrace();
}
......@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
struct ftrace_ops *op;
int bit;
if (function_trace_stop)
return;
if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
trace_recursion_set(TRACE_INTERNAL_BIT);
/*
* Some of the ops may be dynamically allocated,
* they must be freed after a synchronize_sched().
*/
preempt_disable_notrace();
op = rcu_dereference_raw(ftrace_ops_list);
while (op != &ftrace_list_end) {
do_for_each_ftrace_op(op, ftrace_ops_list) {
if (ftrace_ops_test(op, ip))
op->func(ip, parent_ip, op, regs);
op = rcu_dereference_raw(op->next);
};
} while_for_each_ftrace_op(op);
preempt_enable_notrace();
trace_recursion_clear(TRACE_INTERNAL_BIT);
trace_clear_recursion(bit);
}
/*
......@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
* Archs are to support both the regs and ftrace_ops at the same time.
* If they support ftrace_ops, it is assumed they support regs.
* If call backs want to use regs, they must either check for regs
* being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
* Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
* being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
* Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
* An architecture can pass partial regs with ftrace_ops and still
* set the ARCH_SUPPORT_FTARCE_OPS.
*/
......
......@@ -3,8 +3,10 @@
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ftrace_event.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
#include <linux/trace_seq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
......@@ -21,7 +23,6 @@
#include <linux/fs.h>
#include <asm/local.h>
#include "trace.h"
static void update_pages_handler(struct work_struct *work);
......@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
#ifdef CONFIG_TRACING
#define TRACE_RECURSIVE_DEPTH 16
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
* be modified more than once via an interrupt. To pass this
* information from the lock to the unlock without having to
* access the 'in_interrupt()' functions again (which do show
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
* bit 0 = NMI context
* bit 1 = IRQ context
* bit 2 = SoftIRQ context
* bit 3 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
* context.
*
* When the context is determined, the corresponding bit is
* checked and set (if it was set, then a recursion of that context
* happened).
*
* On unlock, we need to clear this bit. To do so, just subtract
* 1 from the current_context and AND it to itself.
*
* (binary)
* 101 - 1 = 100
* 101 & 100 = 100 (clearing bit zero)
*
* 1010 - 1 = 1001
* 1010 & 1001 = 1000 (clearing bit 1)
*
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
*/
static DEFINE_PER_CPU(unsigned int, current_context);
/* Keep this code out of the fast path cache */
static noinline void trace_recursive_fail(void)
static __always_inline int trace_recursive_lock(void)
{
/* Disable all tracing before we do anything else */
tracing_off_permanent();
unsigned int val = this_cpu_read(current_context);
int bit;
printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
"HC[%lu]:SC[%lu]:NMI[%lu]\n",
trace_recursion_buffer(),
hardirq_count() >> HARDIRQ_SHIFT,
softirq_count() >> SOFTIRQ_SHIFT,
in_nmi());
WARN_ON_ONCE(1);
}
static inline int trace_recursive_lock(void)
{
trace_recursion_inc();
if (in_interrupt()) {
if (in_nmi())
bit = 0;
else if (in_irq())
bit = 1;
else
bit = 2;
} else
bit = 3;
if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
return 0;
if (unlikely(val & (1 << bit)))
return 1;
trace_recursive_fail();
val |= (1 << bit);
this_cpu_write(current_context, val);
return -1;
return 0;
}
static inline void trace_recursive_unlock(void)
static __always_inline void trace_recursive_unlock(void)
{
WARN_ON_ONCE(!trace_recursion_buffer());
unsigned int val = this_cpu_read(current_context);
trace_recursion_dec();
val--;
val &= this_cpu_read(current_context);
this_cpu_write(current_context, val);
}
#else
......@@ -3425,7 +3461,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
/* check for end of page padding */
if ((iter->head >= rb_page_size(iter->head_page)) &&
(iter->head_page != cpu_buffer->commit_page))
rb_advance_iter(iter);
rb_inc_iter(iter);
}
static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
......
......@@ -709,10 +709,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
if (!current_trace->use_max_tr) {
WARN_ON_ONCE(1);
/* If we disabled the tracer, stop now */
if (current_trace == &nop_trace)
return;
}
if (WARN_ON_ONCE(!current_trace->use_max_tr))
return;
arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
......@@ -922,6 +926,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
{
struct ring_buffer *buffer = tr->buffer;
if (!buffer)
return;
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
......@@ -936,6 +943,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
struct ring_buffer *buffer = tr->buffer;
int cpu;
if (!buffer)
return;
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
......@@ -1167,7 +1177,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
entry->padding = 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
......@@ -1517,7 +1526,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
static char *get_trace_buf(void)
{
struct trace_buffer_struct *percpu_buffer;
struct trace_buffer_struct *buffer;
/*
* If we have allocated per cpu buffers, then we do not
......@@ -1535,9 +1543,7 @@ static char *get_trace_buf(void)
if (!percpu_buffer)
return NULL;
buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
return buffer->buffer;
return this_cpu_ptr(&percpu_buffer->buffer[0]);
}
static int alloc_percpu_trace_buffer(void)
......@@ -3183,6 +3189,7 @@ static int tracing_set_tracer(const char *buf)
static struct trace_option_dentry *topts;
struct trace_array *tr = &global_trace;
struct tracer *t;
bool had_max_tr;
int ret = 0;
mutex_lock(&trace_types_lock);
......@@ -3209,7 +3216,19 @@ static int tracing_set_tracer(const char *buf)
trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
if (current_trace && current_trace->use_max_tr) {
had_max_tr = current_trace && current_trace->use_max_tr;
current_trace = &nop_trace;
if (had_max_tr && !t->use_max_tr) {
/*
* We need to make sure that the update_max_tr sees that
* current_trace changed to nop_trace to keep it from
* swapping the buffers after we resize it.
* The update_max_tr is called from interrupts disabled
* so a synchronized_sched() is sufficient.
*/
synchronize_sched();
/*
* We don't free the ring buffer. instead, resize it because
* The max_tr ring buffer has some state (e.g. ring->clock) and
......@@ -3220,10 +3239,8 @@ static int tracing_set_tracer(const char *buf)
}
destroy_trace_option_files(topts);
current_trace = &nop_trace;
topts = create_trace_option_files(t);
if (t->use_max_tr) {
if (t->use_max_tr && !had_max_tr) {
/* we need to make per cpu buffer sizes equivalent */
ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
RING_BUFFER_ALL_CPUS);
......@@ -4037,8 +4054,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
* Reset the buffer so that it doesn't have incomparable timestamps.
*/
tracing_reset_online_cpus(&global_trace);
if (max_tr.buffer)
tracing_reset_online_cpus(&max_tr);
tracing_reset_online_cpus(&max_tr);
mutex_unlock(&trace_types_lock);
......
......@@ -291,16 +291,57 @@ struct tracer {
/* Only current can touch trace_recursion */
#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
/* Ring buffer has the 10 LSB bits to count */
#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
/* for function tracing recursion */
#define TRACE_INTERNAL_BIT (1<<11)
#define TRACE_GLOBAL_BIT (1<<12)
#define TRACE_CONTROL_BIT (1<<13)
/*
* For function tracing recursion:
* The order of these bits are important.
*
* When function tracing occurs, the following steps are made:
* If arch does not support a ftrace feature:
* call internal function (uses INTERNAL bits) which calls...
* If callback is registered to the "global" list, the list
* function is called and recursion checks the GLOBAL bits.
* then this function calls...
* The function callback, which can use the FTRACE bits to
* check for recursion.
*
* Now if the arch does not suppport a feature, and it calls
* the global list function which calls the ftrace callback
* all three of these steps will do a recursion protection.
* There's no reason to do one if the previous caller already
* did. The recursion that we are protecting against will
* go through the same steps again.
*
* To prevent the multiple recursion checks, if a recursion
* bit is set that is higher than the MAX bit of the current
* check, then we know that the check was made by the previous
* caller, and we can skip the current check.
*/
enum {
TRACE_BUFFER_BIT,
TRACE_BUFFER_NMI_BIT,
TRACE_BUFFER_IRQ_BIT,
TRACE_BUFFER_SIRQ_BIT,
/* Start of function recursion bits */
TRACE_FTRACE_BIT,
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
/* GLOBAL_BITs must be greater than FTRACE_BITs */
TRACE_GLOBAL_BIT,
TRACE_GLOBAL_NMI_BIT,
TRACE_GLOBAL_IRQ_BIT,
TRACE_GLOBAL_SIRQ_BIT,
/* INTERNAL_BITs must be greater than GLOBAL_BITs */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
TRACE_INTERNAL_SIRQ_BIT,
TRACE_CONTROL_BIT,
/*
* Abuse of the trace_recursion.
......@@ -309,11 +350,77 @@ struct tracer {
* was called in irq context but we have irq tracing off. Since this
* can only be modified by current, we can reuse trace_recursion.
*/
#define TRACE_IRQ_BIT (1<<13)
TRACE_IRQ_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT
#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
static __always_inline int trace_get_context_bit(void)
{
int bit;
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
if (in_interrupt()) {
if (in_nmi())
bit = 0;
else if (in_irq())
bit = 1;
else
bit = 2;
} else
bit = 3;
return bit;
}
static __always_inline int trace_test_and_set_recursion(int start, int max)
{
unsigned int val = current->trace_recursion;
int bit;
/* A previous recursion check was made */
if ((val & TRACE_CONTEXT_MASK) > max)
return 0;
bit = trace_get_context_bit() + start;
if (unlikely(val & (1 << bit)))
return -1;
val |= 1 << bit;
current->trace_recursion = val;
barrier();
return bit;
}
static __always_inline void trace_clear_recursion(int bit)
{
unsigned int val = current->trace_recursion;
if (!bit)
return;
bit = 1 << bit;
val &= ~bit;
barrier();
current->trace_recursion = val;
}
#define TRACE_PIPE_ALL_CPU -1
......
......@@ -21,8 +21,6 @@
#include <linux/ktime.h>
#include <linux/trace_clock.h>
#include "trace.h"
/*
* trace_clock_local(): the simplest and least coherent tracing clock.
*
......
......@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
__common_field(int, padding);
return ret;
}
......
......@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(tr);
}
static void
function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
struct trace_array *tr = func_trace;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
int pc;
if (unlikely(!ftrace_function_enabled))
return;
pc = preempt_count();
preempt_disable_notrace();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
trace_function(tr, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
preempt_enable_notrace();
}
/* Our option */
enum {
TRACE_FUNC_OPT_STACK = 0x1,
......@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
struct trace_array *tr = func_trace;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
unsigned int bit;
int cpu;
int pc;
if (unlikely(!ftrace_function_enabled))
return;
/*
* Need to use raw, since this must be called before the
* recursive protection is performed.
*/
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
pc = preempt_count();
preempt_disable_notrace();
if (likely(disabled == 1)) {
pc = preempt_count();
bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
if (bit < 0)
goto out;
cpu = smp_processor_id();
data = tr->data[cpu];
if (!atomic_read(&data->disabled)) {
local_save_flags(flags);
trace_function(tr, ip, parent_ip, flags, pc);
}
trace_clear_recursion(bit);
atomic_dec(&data->disabled);
local_irq_restore(flags);
out:
preempt_enable_notrace();
}
static void
......@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
if (trace_flags & TRACE_ITER_PREEMPTONLY)
trace_ops.func = function_trace_call_preempt_only;
else
trace_ops.func = function_trace_call;
if (func_flags.val & TRACE_FUNC_OPT_STACK)
register_ftrace_function(&trace_stack_ops);
else
......
......@@ -47,6 +47,8 @@ struct fgraph_data {
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
#define TRACE_GRAPH_PRINT_IRQS 0x40
static unsigned int max_depth;
static struct tracer_opt trace_opts[] = {
/* Display overruns? (for self-debug purpose) */
{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
......@@ -250,8 +252,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
return 0;
/* trace it when it is-nested-in or is a function enabled. */
if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
ftrace_graph_ignore_irqs())
if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
ftrace_graph_ignore_irqs()) ||
(max_depth && trace->depth >= max_depth))
return 0;
local_irq_save(flags);
......@@ -1457,6 +1460,59 @@ static struct tracer graph_trace __read_mostly = {
#endif
};
static ssize_t
graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
unsigned long val;
int ret;
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
if (ret)
return ret;
max_depth = val;
*ppos += cnt;
return cnt;
}
static ssize_t
graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
int n;
n = sprintf(buf, "%d\n", max_depth);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
}
static const struct file_operations graph_depth_fops = {
.open = tracing_open_generic,
.write = graph_depth_write,
.read = graph_depth_read,
.llseek = generic_file_llseek,
};
static __init int init_graph_debugfs(void)
{
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
trace_create_file("max_graph_depth", 0644, d_tracer,
NULL, &graph_depth_fops);
return 0;
}
fs_initcall(init_graph_debugfs);
static __init int init_graph_trace(void)
{
max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
......
......@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
* The ftrace infrastructure should provide the recursion
* protection. If not, this will crash the kernel!
*/
trace_selftest_recursion_cnt++;
if (trace_selftest_recursion_cnt++ > 10)
return;
DYN_FTRACE_TEST_NAME();
}
......@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
char *func_name;
int len;
int ret;
int cnt;
/* The previous test PASSED */
pr_cont("PASSED\n");
......@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_recsafe_probe);
/*
* If arch supports all ftrace features, and no other task
* was on the list, we should be fine.
*/
if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
cnt = 2; /* Should have recursed */
else
cnt = 1;
ret = -1;
if (trace_selftest_recursion_cnt != cnt) {
pr_cont("*callback not called expected %d times (%d)* ",
cnt, trace_selftest_recursion_cnt);
if (trace_selftest_recursion_cnt != 2) {
pr_cont("*callback not called expected 2 times (%d)* ",
trace_selftest_recursion_cnt);
goto out;
}
......@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
int ret;
int supported = 0;
#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
supported = 1;
#endif
......
......@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
return syscalls_metadata[nr];
}
enum print_line_t
static enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
......@@ -130,7 +130,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
return TRACE_TYPE_HANDLED;
}
enum print_line_t
static enum print_line_t
print_syscall_exit(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
......@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
return ret;
}
void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
{
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
......@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
{
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
......@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
int reg_event_syscall_enter(struct ftrace_event_call *call)
static int reg_event_syscall_enter(struct ftrace_event_call *call)
{
int ret = 0;
int num;
......@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
return ret;
}
void unreg_event_syscall_enter(struct ftrace_event_call *call)
static void unreg_event_syscall_enter(struct ftrace_event_call *call)
{
int num;
......@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}
int reg_event_syscall_exit(struct ftrace_event_call *call)
static int reg_event_syscall_exit(struct ftrace_event_call *call)
{
int ret = 0;
int num;
......@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
return ret;
}
void unreg_event_syscall_exit(struct ftrace_event_call *call)
static void unreg_event_syscall_exit(struct ftrace_event_call *call)
{
int num;
......@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
return (unsigned long)sys_call_table[nr];
}
int __init init_ftrace_syscalls(void)
static int __init init_ftrace_syscalls(void)
{
struct syscall_metadata *meta;
unsigned long addr;
......
......@@ -258,6 +258,10 @@ static int create_trace_uprobe(int argc, char **argv)
goto fail_address_parse;
inode = igrab(path.dentry->d_inode);
if (!S_ISREG(inode->i_mode)) {
ret = -EINVAL;
goto fail_address_parse;
}
argc -= 2;
argv += 2;
......@@ -356,7 +360,7 @@ static int create_trace_uprobe(int argc, char **argv)
if (inode)
iput(inode);
pr_info("Failed to parse address.\n");
pr_info("Failed to parse address or file.\n");
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment