Commit df48d871 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (107 commits)
  perf stat: Add more cache-miss percentage printouts
  perf stat: Add -d -d and -d -d -d options to show more CPU events
  ftrace/kbuild: Add recordmcount files to force full build
  ftrace: Add self-tests for multiple function trace users
  ftrace: Modify ftrace_set_filter/notrace to take ops
  ftrace: Allow dynamically allocated function tracers
  ftrace: Implement separate user function filtering
  ftrace: Free hash with call_rcu_sched()
  ftrace: Have global_ops store the functions that are to be traced
  ftrace: Add ops parameter to ftrace_startup/shutdown functions
  ftrace: Add enabled_functions file
  ftrace: Use counters to enable functions to trace
  ftrace: Separate hash allocation and assignment
  ftrace: Create a global_ops to hold the filter and notrace hashes
  ftrace: Use hash instead for FTRACE_FL_FILTER
  ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored functions
  perf bench, x86: Add alternatives-asm.h wrapper
  x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit
  x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB
  x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB
  ...
parents acd30250 29510ec3
......@@ -1268,6 +1268,7 @@ help:
@echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
@echo ' make W=1 [targets] Enable extra gcc checks'
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
@echo ''
@echo 'Execute "make" or "make all" to build all targets marked with [*] '
@echo 'For further info see the ./README file'
......
......@@ -20,16 +20,18 @@
#define WORD_INSN ".word"
#endif
#define JUMP_LABEL(key, label) \
do { \
asm goto("1:\tnop\n\t" \
"nop\n\t" \
".pushsection __jump_table, \"a\"\n\t" \
WORD_INSN " 1b, %l[" #label "], %0\n\t" \
".popsection\n\t" \
: : "i" (key) : : label); \
} while (0)
static __always_inline bool arch_static_branch(struct jump_label_key *key)
{
asm goto("1:\tnop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"
: : "i" (key) : : l_yes);
return false;
l_yes:
return true;
}
#endif /* __KERNEL__ */
......
......@@ -88,6 +88,7 @@ config S390
select HAVE_KERNEL_XZ
select HAVE_GET_USER_PAGES_FAST
select HAVE_ARCH_MUTEX_CPU_RELAX
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
select ARCH_INLINE_SPIN_TRYLOCK
select ARCH_INLINE_SPIN_TRYLOCK_BH
select ARCH_INLINE_SPIN_LOCK
......
......@@ -11,15 +11,13 @@ struct dyn_arch_ftrace { };
#ifdef CONFIG_64BIT
#define MCOUNT_INSN_SIZE 12
#define MCOUNT_OFFSET 8
#else
#define MCOUNT_INSN_SIZE 20
#define MCOUNT_OFFSET 4
#endif
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr - MCOUNT_OFFSET;
return addr;
}
#endif /* __ASSEMBLY__ */
......
#ifndef _ASM_S390_JUMP_LABEL_H
#define _ASM_S390_JUMP_LABEL_H
#include <linux/types.h>
#define JUMP_LABEL_NOP_SIZE 6
#ifdef CONFIG_64BIT
#define ASM_PTR ".quad"
#define ASM_ALIGN ".balign 8"
#else
#define ASM_PTR ".long"
#define ASM_ALIGN ".balign 4"
#endif
static __always_inline bool arch_static_branch(struct jump_label_key *key)
{
asm goto("0: brcl 0,0\n"
".pushsection __jump_table, \"aw\"\n"
ASM_ALIGN "\n"
ASM_PTR " 0b, %l[label], %0\n"
".popsection\n"
: : "X" (key) : : label);
return false;
label:
return true;
}
typedef unsigned long jump_label_t;
struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};
#endif
......@@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \
processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
vdso.o vtime.o sysinfo.o nmi.o sclp.o
vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
......
/*
* Jump label s390 support
*
* Copyright IBM Corp. 2011
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/stop_machine.h>
#include <linux/jump_label.h>
#include <asm/ipl.h>
#ifdef HAVE_JUMP_LABEL
struct insn {
u16 opcode;
s32 offset;
} __packed;
struct insn_args {
unsigned long *target;
struct insn *insn;
ssize_t size;
};
static int __arch_jump_label_transform(void *data)
{
struct insn_args *args = data;
int rc;
rc = probe_kernel_write(args->target, args->insn, args->size);
WARN_ON_ONCE(rc < 0);
return 0;
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
struct insn_args args;
struct insn insn;
if (type == JUMP_LABEL_ENABLE) {
/* brcl 15,offset */
insn.opcode = 0xc0f4;
insn.offset = (entry->target - entry->code) >> 1;
} else {
/* brcl 0,0 */
insn.opcode = 0xc004;
insn.offset = 0;
}
args.target = (void *) entry->code;
args.insn = &insn;
args.size = JUMP_LABEL_NOP_SIZE;
stop_machine(__arch_jump_label_transform, &args, NULL);
}
#endif
......@@ -7,17 +7,20 @@
#define JUMP_LABEL_NOP_SIZE 4
#define JUMP_LABEL(key, label) \
do { \
asm goto("1:\n\t" \
"nop\n\t" \
"nop\n\t" \
".pushsection __jump_table, \"a\"\n\t"\
".align 4\n\t" \
".word 1b, %l[" #label "], %c0\n\t" \
".popsection \n\t" \
: : "i" (key) : : label);\
} while (0)
static __always_inline bool arch_static_branch(struct jump_label_key *key)
{
asm goto("1:\n\t"
"nop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align 4\n\t"
".word 1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
: : "i" (key) : : l_yes);
return false;
l_yes:
return true;
}
#endif /* __KERNEL__ */
......
......@@ -15,4 +15,13 @@
.endm
#endif
.macro altinstruction_entry orig alt feature orig_len alt_len
.align 8
.quad \orig
.quad \alt
.word \feature
.byte \orig_len
.byte \alt_len
.endm
#endif /* __ASSEMBLY__ */
......@@ -4,7 +4,6 @@
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <linux/jump_label.h>
#include <asm/asm.h>
/*
......@@ -191,7 +190,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
extern void text_poke_smp_batch(struct text_poke_param *params, int n);
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
extern void arch_init_ideal_nop5(void);
......
......@@ -195,6 +195,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
......
......@@ -38,11 +38,10 @@ extern void mcount(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/*
* call mcount is "e8 <4 byte offset>"
* The addr points to the 4 byte offset and the caller of this
* function wants the pointer to e8. Simply subtract one.
* addr is the address of the mcount call instruction.
* recordmcount does the necessary offset calculation.
*/
return addr - 1;
return addr;
}
#ifdef CONFIG_DYNAMIC_FTRACE
......
......@@ -5,20 +5,25 @@
#include <linux/types.h>
#include <asm/nops.h>
#include <asm/asm.h>
#define JUMP_LABEL_NOP_SIZE 5
# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
# define JUMP_LABEL(key, label) \
do { \
asm goto("1:" \
JUMP_LABEL_INITIAL_NOP \
".pushsection __jump_table, \"aw\" \n\t"\
_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
".popsection \n\t" \
: : "i" (key) : : label); \
} while (0)
#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
static __always_inline bool arch_static_branch(struct jump_label_key *key)
{
asm goto("1:"
JUMP_LABEL_INITIAL_NOP
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
".popsection \n\t"
: : "i" (key) : : l_yes);
return false;
l_yes:
return true;
}
#endif /* __KERNEL__ */
......
......@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align);
* executable.)
*/
#define RESERVE_BRK(name,sz) \
static void __section(.discard.text) __used \
static void __section(.discard.text) __used notrace \
__brk_reservation_fn_##name##__(void) { \
asm volatile ( \
".pushsection .brk_reservation,\"aw\",@nobits;" \
......
......@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo,
/* Generic stack tracer with callbacks */
struct stacktrace_ops {
void (*warning)(void *data, char *msg);
/* msg must contain %s for the symbol */
void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
void (*address)(void *data, unsigned long address, int reliable);
/* On negative return stop dumping */
int (*stack)(void *data, char *name);
......
......@@ -42,7 +42,7 @@
* Returns 0 if the range is valid, nonzero otherwise.
*
* This is equivalent to the following test:
* (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
* (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
*
* This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
*/
......
......@@ -210,6 +210,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
u8 insnbuf[MAX_PATCH_LEN];
DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite a previous scanned alternative code.
* Some kernel functions (e.g. memcpy, memset, etc) use this order to
* patch code.
*
* So be careful if you want to change the scan order to any other
* order.
*/
for (a = start; a < end; a++) {
u8 *instr = a->instr;
BUG_ON(a->replacementlen > a->instrlen);
......@@ -679,7 +688,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
}
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
#ifdef CONFIG_X86_64
unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
......
......@@ -565,8 +565,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
if (eax > 0)
c->x86_capability[9] = ebx;
c->x86_capability[9] = ebx;
}
/* AMD-defined flags: level 0x80000001 */
......
......@@ -29,10 +29,10 @@
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
/* Unmask CPUID levels if masked: */
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
u64 misc_enable;
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
......@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
* (model 2) with the same problem.
*/
if (c->x86 == 15) {
u64 misc_enable;
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
......@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
}
}
#endif
/*
* If fast string is not enabled in IA32_MISC_ENABLE for any reason,
* clear the fast string and enhanced fast string CPU capabilities.
*/
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
printk(KERN_INFO "Disabled fast string operations\n");
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
setup_clear_cpu_cap(X86_FEATURE_ERMS);
}
}
}
#ifdef CONFIG_X86_32
......
......@@ -31,6 +31,7 @@
#include <asm/nmi.h>
#include <asm/compat.h>
#include <asm/smp.h>
#include <asm/alternative.h>
#if 0
#undef wrmsrl
......@@ -363,12 +364,18 @@ x86_perf_event_update(struct perf_event *event)
return new_raw_count;
}
/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
static inline int x86_pmu_addr_offset(int index)
{
if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return index << 1;
return index;
int offset;
/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
alternative_io(ASM_NOP2,
"shll $1, %%eax",
X86_FEATURE_PERFCTR_CORE,
"=a" (offset),
"a" (index));
return offset;
}
static inline unsigned int x86_pmu_config_addr(int index)
......@@ -1766,17 +1773,6 @@ static struct pmu pmu = {
* callchain support
*/
static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
/* Ignore warnings */
}
static void backtrace_warning(void *data, char *msg)
{
/* Ignore warnings */
}
static int backtrace_stack(void *data, char *name)
{
return 0;
......@@ -1790,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops backtrace_ops = {
.warning = backtrace_warning,
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
.address = backtrace_address,
.walk_stack = print_context_stack_bp,
......
......@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
*/
static const u64 amd_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
};
static u64 amd_pmu_event_map(int hw_event)
......
......@@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
static struct event_constraint intel_core_event_constraints[] =
static struct event_constraint intel_core_event_constraints[] __read_mostly =
{
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
......@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] =
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_core2_event_constraints[] =
static struct event_constraint intel_core2_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
......@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] =
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_nehalem_event_constraints[] =
static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
......@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] =
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_nehalem_extra_regs[] =
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
EVENT_EXTRA_END
};
static struct event_constraint intel_nehalem_percore_constraints[] =
static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
{
INTEL_EVENT_CONSTRAINT(0xb7, 0),
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_westmere_event_constraints[] =
static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
......@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_snb_event_constraints[] =
static struct event_constraint intel_snb_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
......@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] =
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_westmere_extra_regs[] =
static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
EVENT_EXTRA_END
};
static struct event_constraint intel_westmere_percore_constraints[] =
static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
{
INTEL_EVENT_CONSTRAINT(0xb7, 0),
INTEL_EVENT_CONSTRAINT(0xbb, 0),
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_gen_event_constraints[] =
static struct event_constraint intel_gen_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
......@@ -1440,6 +1440,11 @@ static __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
if (ebx & 0x40) {
/*
* Erratum AAJ80 detected, we work it around by using
......@@ -1480,6 +1485,12 @@ static __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
x86_pmu.extra_regs = intel_westmere_extra_regs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
pr_cont("Westmere events, ");
break;
......@@ -1491,6 +1502,12 @@ static __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_events;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
pr_cont("SandyBridge events, ");
break;
......
......@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = {
.opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
.escr_emask =
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_X87_ASSIST] = {
......@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
data.addr = 0;
data.raw = NULL;
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -1197,7 +1196,7 @@ static __init int p4_pmu_init(void)
{
unsigned int low, high;
/* If we get stripped -- indexig fails */
/* If we get stripped -- indexing fails */
BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
......
......@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo,
}
EXPORT_SYMBOL_GPL(print_context_stack_bp);
static void
print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
printk(data);
print_symbol(msg, symbol);
printk("\n");
}
static void print_trace_warning(void *data, char *msg)
{
printk("%s%s\n", (char *)data, msg);
}
static int print_trace_stack(void *data, char *name)
{
printk("%s <%s> ", (char *)data, name);
......@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
.address = print_trace_address,
.walk_stack = print_context_stack,
......
......@@ -24,6 +24,7 @@
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/gfp.h>
#include <linux/jump_label.h>
#include <asm/system.h>
#include <asm/page.h>
......
......@@ -9,15 +9,6 @@
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
static void save_stack_warning(void *data, char *msg)
{
}
static void
save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
{
}
static int save_stack_stack(void *data, char *name)
{
return 0;
......@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops save_stack_ops = {
.warning = save_stack_warning,
.warning_symbol = save_stack_warning_symbol,
.stack = save_stack_stack,
.address = save_stack_address,
.walk_stack = print_context_stack,
};
static const struct stacktrace_ops save_stack_ops_nosched = {
.warning = save_stack_warning,
.warning_symbol = save_stack_warning_symbol,
.stack = save_stack_stack,
.address = save_stack_address_nosched,
.walk_stack = print_context_stack,
......
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
* Zero a page.
......@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
CFI_ENDPROC
ENDPROC(clear_page_c)
ENTRY(clear_page_c_e)
CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
CFI_ENDPROC
ENDPROC(clear_page_c_e)
ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax
......@@ -38,21 +48,26 @@ ENTRY(clear_page)
.Lclear_page_end:
ENDPROC(clear_page)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
/*
* Some CPUs support enhanced REP MOVSB/STOSB instructions.
* It is recommended to use this when possible.
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
* Otherwise, use original function.
*
*/
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2:
2: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
3:
.previous
.section .altinstructions,"a"
.align 8
.quad clear_page
.quad 1b
.word X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
.Lclear_page_end-clear_page, 2b-1b
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
.Lclear_page_end-clear_page,3b-2b
.previous
......@@ -15,23 +15,30 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
.macro ALTERNATIVE_JUMP feature,orig,alt
/*
* By placing feature2 after feature1 in altinstructions section, we logically
* implement:
* If CPU has feature2, jmp to alt2 is used
* else if CPU has feature1, jmp to alt1 is used
* else jmp to orig is used.
*/
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt-1b /* offset */ /* or alternatively to alt */
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
3: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
.section .altinstructions,"a"
.align 8
.quad 0b
.quad 2b
.word \feature /* when feature is set */
.byte 5
.byte 5
altinstruction_entry 0b,2b,\feature1,5,5
altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
......@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
addq %rdx,%rcx
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
jae bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
ja bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
copy_user_generic_unrolled,copy_user_generic_string, \
copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_to_user)
......@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
addq %rdx,%rcx
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
jae bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
ja bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
copy_user_generic_unrolled,copy_user_generic_string, \
copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_from_user)
......@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
.previous
CFI_ENDPROC
ENDPROC(copy_user_generic_string)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
* It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_enhanced_fast_string)
CFI_STARTPROC
andl %edx,%edx
jz 2f
movl %edx,%ecx
1: rep
movsb
2: xorl %eax,%eax
ret
.section .fixup,"ax"
12: movl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
.section __ex_table,"a"
.align 8
.quad 1b,12b
.previous
CFI_ENDPROC
ENDPROC(copy_user_enhanced_fast_string)
......@@ -4,6 +4,7 @@
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
* memcpy - Copy a memory block.
......@@ -37,6 +38,23 @@
.Lmemcpy_e:
.previous
/*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
* memcpy_c. Use memcpy_c_e when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
movl %edx, %ecx
rep movsb
ret
.Lmemcpy_e_e:
.previous
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
......@@ -171,21 +189,22 @@ ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* Some CPUs run faster using the string copy instructions.
* It is also a lot simpler. Use this when possible:
*/
.section .altinstructions, "a"
.align 8
.quad memcpy
.quad .Lmemcpy_c
.word X86_FEATURE_REP_GOOD
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.byte .Lmemcpy_e - .Lmemcpy_c
.byte .Lmemcpy_e - .Lmemcpy_c
.section .altinstructions, "a"
altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
.previous
......@@ -8,6 +8,7 @@
#define _STRING_C
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#undef memmove
......@@ -24,6 +25,7 @@
*/
ENTRY(memmove)
CFI_STARTPROC
/* Handle more 32bytes in loop */
mov %rdi, %rax
cmp $0x20, %rdx
......@@ -31,8 +33,13 @@ ENTRY(memmove)
/* Decide forward/backward copy mode */
cmp %rdi, %rsi
jb 2f
jge .Lmemmove_begin_forward
mov %rsi, %r8
add %rdx, %r8
cmp %rdi, %r8
jg 2f
.Lmemmove_begin_forward:
/*
* movsq instruction have many startup latency
* so we handle small size by general register.
......@@ -78,6 +85,8 @@ ENTRY(memmove)
rep movsq
movq %r11, (%r10)
jmp 13f
.Lmemmove_end_forward:
/*
* Handle data backward by movsq.
*/
......@@ -194,4 +203,22 @@ ENTRY(memmove)
13:
retq
CFI_ENDPROC
.section .altinstr_replacement,"ax"
.Lmemmove_begin_forward_efs:
/* Forward moving data. */
movq %rdx, %rcx
rep movsb
retq
.Lmemmove_end_forward_efs:
.previous
.section .altinstructions,"a"
.align 8
.quad .Lmemmove_begin_forward
.quad .Lmemmove_begin_forward_efs
.word X86_FEATURE_ERMS
.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
.previous
ENDPROC(memmove)
......@@ -2,9 +2,13 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
* ISO C memset - set a memory block to a byte value.
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
*
* rdi destination
* rsi value (char)
......@@ -31,6 +35,28 @@
.Lmemset_e:
.previous
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
* The code is simpler and shorter than the fast string function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
movq %rdi,%r9
movb %sil,%al
movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
.Lmemset_e_e:
.previous
ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
......@@ -112,16 +138,20 @@ ENTRY(__memset)
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
* It is recommended to use this when possible.
*
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a"
.align 8
.quad memset
.quad .Lmemset_c
.word X86_FEATURE_REP_GOOD
.byte .Lfinal - memset
.byte .Lmemset_e - .Lmemset_c
altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
.Lfinal-memset,.Lmemset_e-.Lmemset_c
altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
.Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
.previous
......@@ -16,17 +16,6 @@
#include <asm/stacktrace.h>
#include <linux/compat.h>
static void backtrace_warning_symbol(void *data, char *msg,
unsigned long symbol)
{
/* Ignore warnings */
}
static void backtrace_warning(void *data, char *msg)
{
/* Ignore warnings */
}
static int backtrace_stack(void *data, char *name)
{
/* Yes, we want all stacks */
......@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
}
static struct stacktrace_ops backtrace_ops = {
.warning = backtrace_warning,
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
.address = backtrace_address,
.walk_stack = print_context_stack,
......
......@@ -170,6 +170,10 @@
STRUCT_ALIGN(); \
*(__tracepoints) \
/* implement dynamic printk debug */ \
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___jump_table) = .; \
*(__jump_table) \
VMLINUX_SYMBOL(__stop___jump_table) = .; \
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___verbose) = .; \
*(__verbose) \
......@@ -228,8 +232,6 @@
\
BUG_TABLE \
\
JUMP_TABLE \
\
/* PCI quirks */ \
.pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
......@@ -589,14 +591,6 @@
#define BUG_TABLE
#endif
#define JUMP_TABLE \
. = ALIGN(8); \
__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___jump_table) = .; \
*(__jump_table) \
VMLINUX_SYMBOL(__stop___jump_table) = .; \
}
#ifdef CONFIG_PM_TRACE
#define TRACEDATA \
. = ALIGN(4); \
......
#ifndef _DYNAMIC_DEBUG_H
#define _DYNAMIC_DEBUG_H
#include <linux/jump_label.h>
/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
* bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
* use independent hash functions, to reduce the chance of false positives.
......
......@@ -29,9 +29,22 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
struct ftrace_hash;
enum {
FTRACE_OPS_FL_ENABLED = 1 << 0,
FTRACE_OPS_FL_GLOBAL = 1 << 1,
FTRACE_OPS_FL_DYNAMIC = 1 << 2,
};
struct ftrace_ops {
ftrace_func_t func;
struct ftrace_ops *next;
ftrace_func_t func;
struct ftrace_ops *next;
unsigned long flags;
#ifdef CONFIG_DYNAMIC_FTRACE
struct ftrace_hash *notrace_hash;
struct ftrace_hash *filter_hash;
#endif
};
extern int function_trace_stop;
......@@ -146,14 +159,13 @@ extern void unregister_ftrace_function_probe_all(char *glob);
extern int ftrace_text_reserved(void *start, void *end);
enum {
FTRACE_FL_FREE = (1 << 0),
FTRACE_FL_FAILED = (1 << 1),
FTRACE_FL_FILTER = (1 << 2),
FTRACE_FL_ENABLED = (1 << 3),
FTRACE_FL_NOTRACE = (1 << 4),
FTRACE_FL_CONVERTED = (1 << 5),
FTRACE_FL_ENABLED = (1 << 30),
FTRACE_FL_FREE = (1 << 31),
};
#define FTRACE_FL_MASK (0x3UL << 30)
#define FTRACE_REF_MAX ((1 << 30) - 1)
struct dyn_ftrace {
union {
unsigned long ip; /* address of mcount call-site */
......@@ -167,7 +179,12 @@ struct dyn_ftrace {
};
int ftrace_force_update(void);
void ftrace_set_filter(unsigned char *buf, int len, int reset);
void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset);
void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset);
void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
int register_ftrace_command(struct ftrace_func_command *cmd);
int unregister_ftrace_command(struct ftrace_func_command *cmd);
......
......@@ -79,29 +79,29 @@
#define __exitused __used
#endif
#define __exit __section(.exit.text) __exitused __cold
#define __exit __section(.exit.text) __exitused __cold notrace
/* Used for HOTPLUG */
#define __devinit __section(.devinit.text) __cold
#define __devinit __section(.devinit.text) __cold notrace
#define __devinitdata __section(.devinit.data)
#define __devinitconst __section(.devinit.rodata)
#define __devexit __section(.devexit.text) __exitused __cold
#define __devexit __section(.devexit.text) __exitused __cold notrace
#define __devexitdata __section(.devexit.data)
#define __devexitconst __section(.devexit.rodata)
/* Used for HOTPLUG_CPU */
#define __cpuinit __section(.cpuinit.text) __cold
#define __cpuinit __section(.cpuinit.text) __cold notrace
#define __cpuinitdata __section(.cpuinit.data)
#define __cpuinitconst __section(.cpuinit.rodata)
#define __cpuexit __section(.cpuexit.text) __exitused __cold
#define __cpuexit __section(.cpuexit.text) __exitused __cold notrace
#define __cpuexitdata __section(.cpuexit.data)
#define __cpuexitconst __section(.cpuexit.rodata)
/* Used for MEMORY_HOTPLUG */
#define __meminit __section(.meminit.text) __cold
#define __meminit __section(.meminit.text) __cold notrace
#define __meminitdata __section(.meminit.data)
#define __meminitconst __section(.meminit.rodata)
#define __memexit __section(.memexit.text) __exitused __cold
#define __memexit __section(.memexit.text) __exitused __cold notrace
#define __memexitdata __section(.memexit.data)
#define __memexitconst __section(.memexit.rodata)
......
#ifndef _LINUX_JUMP_LABEL_H
#define _LINUX_JUMP_LABEL_H
#include <linux/types.h>
#include <linux/compiler.h>
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
struct jump_label_key {
atomic_t enabled;
struct jump_entry *entries;
#ifdef CONFIG_MODULES
struct jump_label_mod *next;
#endif
};
# include <asm/jump_label.h>
# define HAVE_JUMP_LABEL
#endif
enum jump_label_type {
JUMP_LABEL_DISABLE = 0,
JUMP_LABEL_ENABLE,
JUMP_LABEL_DISABLE
};
struct module;
#ifdef HAVE_JUMP_LABEL
#ifdef CONFIG_MODULES
#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL}
#else
#define JUMP_LABEL_INIT {{ 0 }, NULL}
#endif
static __always_inline bool static_branch(struct jump_label_key *key)
{
return arch_static_branch(key);
}
extern struct jump_entry __start___jump_table[];
extern struct jump_entry __stop___jump_table[];
......@@ -23,37 +46,37 @@ extern void jump_label_unlock(void);
extern void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type);
extern void arch_jump_label_text_poke_early(jump_label_t addr);
extern void jump_label_update(unsigned long key, enum jump_label_type type);
extern void jump_label_apply_nops(struct module *mod);
extern int jump_label_text_reserved(void *start, void *end);
extern void jump_label_inc(struct jump_label_key *key);
extern void jump_label_dec(struct jump_label_key *key);
extern bool jump_label_enabled(struct jump_label_key *key);
extern void jump_label_apply_nops(struct module *mod);
#define jump_label_enable(key) \
jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
#else
#define jump_label_disable(key) \
jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
#include <asm/atomic.h>
#else
#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
#define JUMP_LABEL(key, label) \
do { \
if (unlikely(*key)) \
goto label; \
} while (0)
struct jump_label_key {
atomic_t enabled;
};
#define jump_label_enable(cond_var) \
do { \
*(cond_var) = 1; \
} while (0)
static __always_inline bool static_branch(struct jump_label_key *key)
{
if (unlikely(atomic_read(&key->enabled)))
return true;
return false;
}
#define jump_label_disable(cond_var) \
do { \
*(cond_var) = 0; \
} while (0)
static inline void jump_label_inc(struct jump_label_key *key)
{
atomic_inc(&key->enabled);
}
static inline int jump_label_apply_nops(struct module *mod)
static inline void jump_label_dec(struct jump_label_key *key)
{
return 0;
atomic_dec(&key->enabled);
}
static inline int jump_label_text_reserved(void *start, void *end)
......@@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end)
static inline void jump_label_lock(void) {}
static inline void jump_label_unlock(void) {}
#endif
static inline bool jump_label_enabled(struct jump_label_key *key)
{
return !!atomic_read(&key->enabled);
}
#define COND_STMT(key, stmt) \
do { \
__label__ jl_enabled; \
JUMP_LABEL(key, jl_enabled); \
if (0) { \
jl_enabled: \
stmt; \
} \
} while (0)
static inline int jump_label_apply_nops(struct module *mod)
{
return 0;
}
#endif
#endif
#ifndef _LINUX_JUMP_LABEL_REF_H
#define _LINUX_JUMP_LABEL_REF_H
#include <linux/jump_label.h>
#include <asm/atomic.h>
#ifdef HAVE_JUMP_LABEL
static inline void jump_label_inc(atomic_t *key)
{
if (atomic_add_return(1, key) == 1)
jump_label_enable(key);
}
static inline void jump_label_dec(atomic_t *key)
{
if (atomic_dec_and_test(key))
jump_label_disable(key);
}
#else /* !HAVE_JUMP_LABEL */
static inline void jump_label_inc(atomic_t *key)
{
atomic_inc(key);
}
static inline void jump_label_dec(atomic_t *key)
{
atomic_dec(key);
}
#undef JUMP_LABEL
#define JUMP_LABEL(key, label) \
do { \
if (unlikely(__builtin_choose_expr( \
__builtin_types_compatible_p(typeof(key), atomic_t *), \
atomic_read((atomic_t *)(key)), *(key)))) \
goto label; \
} while (0)
#endif /* HAVE_JUMP_LABEL */
#endif /* _LINUX_JUMP_LABEL_REF_H */
......@@ -283,6 +283,7 @@ extern char *get_options(const char *str, int nints, int *ints);
extern unsigned long long memparse(const char *ptr, char **retptr);
extern int core_kernel_text(unsigned long addr);
extern int core_kernel_data(unsigned long addr);
extern int __kernel_text_address(unsigned long addr);
extern int kernel_text_address(unsigned long addr);
extern int func_ptr_is_kernel_text(void *ptr);
......
......@@ -2,8 +2,8 @@
* Performance events:
*
* Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
*
* Data type definitions, declarations, prototypes.
*
......@@ -52,6 +52,8 @@ enum perf_hw_id {
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
PERF_COUNT_HW_MAX, /* non-ABI */
};
......@@ -468,9 +470,9 @@ enum perf_callchain_context {
PERF_CONTEXT_MAX = (__u64)-4095,
};
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
#ifdef __KERNEL__
/*
......@@ -484,9 +486,9 @@ enum perf_callchain_context {
#endif
struct perf_guest_info_callbacks {
int (*is_in_guest) (void);
int (*is_user_mode) (void);
unsigned long (*get_guest_ip) (void);
int (*is_in_guest)(void);
int (*is_user_mode)(void);
unsigned long (*get_guest_ip)(void);
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
......@@ -505,7 +507,7 @@ struct perf_guest_info_callbacks {
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <linux/irq_work.h>
#include <linux/jump_label_ref.h>
#include <linux/jump_label.h>
#include <asm/atomic.h>
#include <asm/local.h>
......@@ -652,19 +654,19 @@ struct pmu {
* Start the transaction, after this ->add() doesn't need to
* do schedulability tests.
*/
void (*start_txn) (struct pmu *pmu); /* optional */
void (*start_txn) (struct pmu *pmu); /* optional */
/*
* If ->start_txn() disabled the ->add() schedulability test
* then ->commit_txn() is required to perform one. On success
* the transaction is closed. On error the transaction is kept
* open until ->cancel_txn() is called.
*/
int (*commit_txn) (struct pmu *pmu); /* optional */
int (*commit_txn) (struct pmu *pmu); /* optional */
/*
* Will cancel the transaction, assumes ->del() is called
* for each successful ->add() during the transaction.
*/
void (*cancel_txn) (struct pmu *pmu); /* optional */
void (*cancel_txn) (struct pmu *pmu); /* optional */
};
/**
......@@ -712,15 +714,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
struct pt_regs *regs);
enum perf_group_flag {
PERF_GROUP_SOFTWARE = 0x1,
PERF_GROUP_SOFTWARE = 0x1,
};
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
struct swevent_hlist {
struct hlist_head heads[SWEVENT_HLIST_SIZE];
struct rcu_head rcu_head;
struct hlist_head heads[SWEVENT_HLIST_SIZE];
struct rcu_head rcu_head;
};
#define PERF_ATTACH_CONTEXT 0x01
......@@ -733,13 +735,13 @@ struct swevent_hlist {
* This is a per-cpu dynamically allocated data structure.
*/
struct perf_cgroup_info {
u64 time;
u64 timestamp;
u64 time;
u64 timestamp;
};
struct perf_cgroup {
struct cgroup_subsys_state css;
struct perf_cgroup_info *info; /* timing info, one per cpu */
struct cgroup_subsys_state css;
struct perf_cgroup_info *info; /* timing info, one per cpu */
};
#endif
......@@ -923,7 +925,7 @@ struct perf_event_context {
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
......@@ -1001,8 +1003,7 @@ struct perf_sample_data {
struct perf_raw_record *raw;
};
static inline
void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
{
data->addr = addr;
data->raw = NULL;
......@@ -1034,13 +1035,12 @@ static inline int is_software_event(struct perf_event *event)
return event->pmu->task_ctx_nr == perf_sw_context;
}
extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
#ifndef perf_arch_fetch_caller_regs
static inline void
perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
#endif
/*
......@@ -1063,26 +1063,24 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
{
struct pt_regs hot_regs;
JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
return;
have_event:
if (!regs) {
perf_fetch_caller_regs(&hot_regs);
regs = &hot_regs;
if (static_branch(&perf_swevent_enabled[event_id])) {
if (!regs) {
perf_fetch_caller_regs(&hot_regs);
regs = &hot_regs;
}
__perf_sw_event(event_id, nr, nmi, regs, addr);
}
__perf_sw_event(event_id, nr, nmi, regs, addr);
}
extern atomic_t perf_sched_events;
extern struct jump_label_key perf_sched_events;
static inline void perf_event_task_sched_in(struct task_struct *task)
{
COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
if (static_branch(&perf_sched_events))
__perf_event_task_sched_in(task);
}
static inline
void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
......@@ -1100,14 +1098,10 @@ extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */
DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
extern void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs);
extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs);
extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
static inline void
perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
......@@ -1143,9 +1137,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record,
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
PERF_RECORD_MISC_KERNEL)
#define perf_instruction_pointer(regs) instruction_pointer(regs)
# define perf_misc_flags(regs) \
(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
# define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif
extern int perf_output_begin(struct perf_output_handle *handle,
......@@ -1180,9 +1174,9 @@ static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline int perf_register_guest_info_callbacks
(struct perf_guest_info_callbacks *callbacks) { return 0; }
(struct perf_guest_info_callbacks *callbacks) { return 0; }
static inline int perf_unregister_guest_info_callbacks
(struct perf_guest_info_callbacks *callbacks) { return 0; }
(struct perf_guest_info_callbacks *callbacks) { return 0; }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
......@@ -1195,23 +1189,22 @@ static inline void perf_event_disable(struct perf_event *event) { }
static inline void perf_event_task_tick(void) { }
#endif
#define perf_output_put(handle, x) \
perf_output_copy((handle), &(x), sizeof(x))
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
/*
* This has to have a higher priority than migration_notifier in sched.c.
*/
#define perf_cpu_notifier(fn) \
do { \
static struct notifier_block fn##_nb __cpuinitdata = \
{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_STARTING, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_ONLINE, \
(void *)(unsigned long)smp_processor_id()); \
register_cpu_notifier(&fn##_nb); \
#define perf_cpu_notifier(fn) \
do { \
static struct notifier_block fn##_nb __cpuinitdata = \
{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_STARTING, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_ONLINE, \
(void *)(unsigned long)smp_processor_id()); \
register_cpu_notifier(&fn##_nb); \
} while (0)
#endif /* __KERNEL__ */
......
......@@ -29,7 +29,7 @@ struct tracepoint_func {
struct tracepoint {
const char *name; /* Tracepoint name */
int state; /* State. */
struct jump_label_key key;
void (*regfunc)(void);
void (*unregfunc)(void);
struct tracepoint_func __rcu *funcs;
......@@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin,
extern struct tracepoint __tracepoint_##name; \
static inline void trace_##name(proto) \
{ \
JUMP_LABEL(&__tracepoint_##name.state, do_trace); \
return; \
do_trace: \
if (static_branch(&__tracepoint_##name.key)) \
__DO_TRACE(&__tracepoint_##name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
......@@ -176,14 +174,14 @@ do_trace: \
* structures, so we create an array of pointers that will be used for iteration
* on the tracepoints.
*/
#define DEFINE_TRACE_FN(name, reg, unreg) \
static const char __tpstrtab_##name[] \
__attribute__((section("__tracepoints_strings"))) = #name; \
struct tracepoint __tracepoint_##name \
__attribute__((section("__tracepoints"))) = \
{ __tpstrtab_##name, 0, reg, unreg, NULL }; \
static struct tracepoint * const __tracepoint_ptr_##name __used \
__attribute__((section("__tracepoints_ptrs"))) = \
#define DEFINE_TRACE_FN(name, reg, unreg) \
static const char __tpstrtab_##name[] \
__attribute__((section("__tracepoints_strings"))) = #name; \
struct tracepoint __tracepoint_##name \
__attribute__((section("__tracepoints"))) = \
{ __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
static struct tracepoint * const __tracepoint_ptr_##name __used \
__attribute__((section("__tracepoints_ptrs"))) = \
&__tracepoint_##name;
#define DEFINE_TRACE(name) \
......
......@@ -21,7 +21,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_perf_event.o = -pg
CFLAGS_REMOVE_irq_work.o = -pg
endif
......@@ -103,8 +102,9 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PERF_EVENTS) += events/
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
......
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_core.o = -pg
endif
obj-y := core.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
......@@ -2,8 +2,8 @@
* Performance events core code:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
......@@ -39,10 +39,10 @@
#include <asm/irq_regs.h>
struct remote_function_call {
struct task_struct *p;
int (*func)(void *info);
void *info;
int ret;
struct task_struct *p;
int (*func)(void *info);
void *info;
int ret;
};
static void remote_function(void *data)
......@@ -76,10 +76,10 @@ static int
task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
{
struct remote_function_call data = {
.p = p,
.func = func,
.info = info,
.ret = -ESRCH, /* No such (running) process */
.p = p,
.func = func,
.info = info,
.ret = -ESRCH, /* No such (running) process */
};
if (task_curr(p))
......@@ -100,10 +100,10 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
{
struct remote_function_call data = {
.p = NULL,
.func = func,
.info = info,
.ret = -ENXIO, /* No such CPU */
.p = NULL,
.func = func,
.info = info,
.ret = -ENXIO, /* No such CPU */
};
smp_call_function_single(cpu, remote_function, &data, 1);
......@@ -125,7 +125,7 @@ enum event_type_t {
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
atomic_t perf_sched_events __read_mostly;
struct jump_label_key perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static atomic_t nr_mmap_events __read_mostly;
......@@ -5429,7 +5429,7 @@ static int swevent_hlist_get(struct perf_event *event)
return err;
}
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
static void sw_perf_event_destroy(struct perf_event *event)
{
......@@ -7445,11 +7445,11 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
}
struct cgroup_subsys perf_subsys = {
.name = "perf_event",
.subsys_id = perf_subsys_id,
.create = perf_cgroup_create,
.destroy = perf_cgroup_destroy,
.exit = perf_cgroup_exit,
.attach = perf_cgroup_attach,
.name = "perf_event",
.subsys_id = perf_subsys_id,
.create = perf_cgroup_create,
.destroy = perf_cgroup_destroy,
.exit = perf_cgroup_exit,
.attach = perf_cgroup_attach,
};
#endif /* CONFIG_CGROUP_PERF */
......@@ -72,6 +72,14 @@ int core_kernel_text(unsigned long addr)
return 0;
}
int core_kernel_data(unsigned long addr)
{
if (addr >= (unsigned long)_sdata &&
addr < (unsigned long)_edata)
return 1;
return 0;
}
int __kernel_text_address(unsigned long addr)
{
if (core_kernel_text(addr))
......
This diff is collapsed.
This diff is collapsed.
......@@ -2014,9 +2014,10 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
{
enum print_line_t ret;
if (iter->lost_events)
trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
iter->cpu, iter->lost_events);
if (iter->lost_events &&
!trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
iter->cpu, iter->lost_events))
return TRACE_TYPE_PARTIAL_LINE;
if (iter->trace && iter->trace->print_line) {
ret = iter->trace->print_line(iter);
......@@ -3230,6 +3231,14 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
if (iter->seq.len >= cnt)
break;
/*
* Setting the full flag means we reached the trace_seq buffer
* size and we should leave by partial output condition above.
* One of the trace_seq_* functions is not used properly.
*/
WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
iter->ent->type);
}
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
......
......@@ -419,6 +419,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
extern unsigned long ftrace_update_tot_cnt;
#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
extern int DYN_FTRACE_TEST_NAME(void);
#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
extern int DYN_FTRACE_TEST_NAME2(void);
#endif
extern int ring_buffer_expanded;
......
......@@ -149,11 +149,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = function_trace_call,
.flags = FTRACE_OPS_FL_GLOBAL,
};
static struct ftrace_ops trace_stack_ops __read_mostly =
{
.func = function_stack_trace_call,
.flags = FTRACE_OPS_FL_GLOBAL,
};
/* Our two options */
......
......@@ -153,6 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = irqsoff_tracer_call,
.flags = FTRACE_OPS_FL_GLOBAL,
};
#endif /* CONFIG_FUNCTION_TRACER */
......
......@@ -830,6 +830,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type))
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
......
......@@ -32,7 +32,7 @@ static DEFINE_MUTEX(btrace_mutex);
struct trace_bprintk_fmt {
struct list_head list;
char fmt[0];
const char *fmt;
};
static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
......@@ -49,6 +49,7 @@ static
void hold_module_trace_bprintk_format(const char **start, const char **end)
{
const char **iter;
char *fmt;
mutex_lock(&btrace_mutex);
for (iter = start; iter < end; iter++) {
......@@ -58,14 +59,18 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
continue;
}
tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
+ strlen(*iter) + 1, GFP_KERNEL);
if (tb_fmt) {
tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
if (tb_fmt)
fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
if (tb_fmt && fmt) {
list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
strcpy(tb_fmt->fmt, *iter);
strcpy(fmt, *iter);
tb_fmt->fmt = fmt;
*iter = tb_fmt->fmt;
} else
} else {
kfree(tb_fmt);
*iter = NULL;
}
}
mutex_unlock(&btrace_mutex);
}
......@@ -84,6 +89,76 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
return 0;
}
/*
* The debugfs/tracing/printk_formats file maps the addresses with
* the ASCII formats that are used in the bprintk events in the
* buffer. For userspace tools to be able to decode the events from
* the buffer, they need to be able to map the address with the format.
*
* The addresses of the bprintk formats are in their own section
* __trace_printk_fmt. But for modules we copy them into a link list.
* The code to print the formats and their addresses passes around the
* address of the fmt string. If the fmt address passed into the seq
* functions is within the kernel core __trace_printk_fmt section, then
* it simply uses the next pointer in the list.
*
* When the fmt pointer is outside the kernel core __trace_printk_fmt
* section, then we need to read the link list pointers. The trick is
* we pass the address of the string to the seq function just like
* we do for the kernel core formats. To get back the structure that
* holds the format, we simply use containerof() and then go to the
* next format in the list.
*/
static const char **
find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
{
struct trace_bprintk_fmt *mod_fmt;
if (list_empty(&trace_bprintk_fmt_list))
return NULL;
/*
* v will point to the address of the fmt record from t_next
* v will be NULL from t_start.
* If this is the first pointer or called from start
* then we need to walk the list.
*/
if (!v || start_index == *pos) {
struct trace_bprintk_fmt *p;
/* search the module list */
list_for_each_entry(p, &trace_bprintk_fmt_list, list) {
if (start_index == *pos)
return &p->fmt;
start_index++;
}
/* pos > index */
return NULL;
}
/*
* v points to the address of the fmt field in the mod list
* structure that holds the module print format.
*/
mod_fmt = container_of(v, typeof(*mod_fmt), fmt);
if (mod_fmt->list.next == &trace_bprintk_fmt_list)
return NULL;
mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list);
return &mod_fmt->fmt;
}
static void format_mod_start(void)
{
mutex_lock(&btrace_mutex);
}
static void format_mod_stop(void)
{
mutex_unlock(&btrace_mutex);
}
#else /* !CONFIG_MODULES */
__init static int
module_trace_bprintk_format_notify(struct notifier_block *self,
......@@ -91,6 +166,13 @@ module_trace_bprintk_format_notify(struct notifier_block *self,
{
return 0;
}
static inline const char **
find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
{
return NULL;
}
static inline void format_mod_start(void) { }
static inline void format_mod_stop(void) { }
#endif /* CONFIG_MODULES */
......@@ -153,20 +235,33 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
}
EXPORT_SYMBOL_GPL(__ftrace_vprintk);
static const char **find_next(void *v, loff_t *pos)
{
const char **fmt = v;
int start_index;
if (!fmt)
fmt = __start___trace_bprintk_fmt + *pos;
start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
if (*pos < start_index)
return fmt;
return find_next_mod_format(start_index, v, fmt, pos);
}
static void *
t_start(struct seq_file *m, loff_t *pos)
{
const char **fmt = __start___trace_bprintk_fmt + *pos;
if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
return NULL;
return fmt;
format_mod_start();
return find_next(NULL, pos);
}
static void *t_next(struct seq_file *m, void * v, loff_t *pos)
{
(*pos)++;
return t_start(m, pos);
return find_next(v, pos);
}
static int t_show(struct seq_file *m, void *v)
......@@ -205,6 +300,7 @@ static int t_show(struct seq_file *m, void *v)
static void t_stop(struct seq_file *m, void *p)
{
format_mod_stop();
}
static const struct seq_operations show_format_seq_ops = {
......
......@@ -129,6 +129,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = wakeup_tracer_call,
.flags = FTRACE_OPS_FL_GLOBAL,
};
#endif /* CONFIG_FUNCTION_TRACER */
......
......@@ -101,6 +101,206 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
#ifdef CONFIG_DYNAMIC_FTRACE
static int trace_selftest_test_probe1_cnt;
static void trace_selftest_test_probe1_func(unsigned long ip,
unsigned long pip)
{
trace_selftest_test_probe1_cnt++;
}
static int trace_selftest_test_probe2_cnt;
static void trace_selftest_test_probe2_func(unsigned long ip,
unsigned long pip)
{
trace_selftest_test_probe2_cnt++;
}
static int trace_selftest_test_probe3_cnt;
static void trace_selftest_test_probe3_func(unsigned long ip,
unsigned long pip)
{
trace_selftest_test_probe3_cnt++;
}
static int trace_selftest_test_global_cnt;
static void trace_selftest_test_global_func(unsigned long ip,
unsigned long pip)
{
trace_selftest_test_global_cnt++;
}
static int trace_selftest_test_dyn_cnt;
static void trace_selftest_test_dyn_func(unsigned long ip,
unsigned long pip)
{
trace_selftest_test_dyn_cnt++;
}
static struct ftrace_ops test_probe1 = {
.func = trace_selftest_test_probe1_func,
};
static struct ftrace_ops test_probe2 = {
.func = trace_selftest_test_probe2_func,
};
static struct ftrace_ops test_probe3 = {
.func = trace_selftest_test_probe3_func,
};
static struct ftrace_ops test_global = {
.func = trace_selftest_test_global_func,
.flags = FTRACE_OPS_FL_GLOBAL,
};
static void print_counts(void)
{
printk("(%d %d %d %d %d) ",
trace_selftest_test_probe1_cnt,
trace_selftest_test_probe2_cnt,
trace_selftest_test_probe3_cnt,
trace_selftest_test_global_cnt,
trace_selftest_test_dyn_cnt);
}
static void reset_counts(void)
{
trace_selftest_test_probe1_cnt = 0;
trace_selftest_test_probe2_cnt = 0;
trace_selftest_test_probe3_cnt = 0;
trace_selftest_test_global_cnt = 0;
trace_selftest_test_dyn_cnt = 0;
}
static int trace_selftest_ops(int cnt)
{
int save_ftrace_enabled = ftrace_enabled;
struct ftrace_ops *dyn_ops;
char *func1_name;
char *func2_name;
int len1;
int len2;
int ret = -1;
printk(KERN_CONT "PASSED\n");
pr_info("Testing dynamic ftrace ops #%d: ", cnt);
ftrace_enabled = 1;
reset_counts();
/* Handle PPC64 '.' name */
func1_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
func2_name = "*" __stringify(DYN_FTRACE_TEST_NAME2);
len1 = strlen(func1_name);
len2 = strlen(func2_name);
/*
* Probe 1 will trace function 1.
* Probe 2 will trace function 2.
* Probe 3 will trace functions 1 and 2.
*/
ftrace_set_filter(&test_probe1, func1_name, len1, 1);
ftrace_set_filter(&test_probe2, func2_name, len2, 1);
ftrace_set_filter(&test_probe3, func1_name, len1, 1);
ftrace_set_filter(&test_probe3, func2_name, len2, 0);
register_ftrace_function(&test_probe1);
register_ftrace_function(&test_probe2);
register_ftrace_function(&test_probe3);
register_ftrace_function(&test_global);
DYN_FTRACE_TEST_NAME();
print_counts();
if (trace_selftest_test_probe1_cnt != 1)
goto out;
if (trace_selftest_test_probe2_cnt != 0)
goto out;
if (trace_selftest_test_probe3_cnt != 1)
goto out;
if (trace_selftest_test_global_cnt == 0)
goto out;
DYN_FTRACE_TEST_NAME2();
print_counts();
if (trace_selftest_test_probe1_cnt != 1)
goto out;
if (trace_selftest_test_probe2_cnt != 1)
goto out;
if (trace_selftest_test_probe3_cnt != 2)
goto out;
/* Add a dynamic probe */
dyn_ops = kzalloc(sizeof(*dyn_ops), GFP_KERNEL);
if (!dyn_ops) {
printk("MEMORY ERROR ");
goto out;
}
dyn_ops->func = trace_selftest_test_dyn_func;
register_ftrace_function(dyn_ops);
trace_selftest_test_global_cnt = 0;
DYN_FTRACE_TEST_NAME();
print_counts();
if (trace_selftest_test_probe1_cnt != 2)
goto out_free;
if (trace_selftest_test_probe2_cnt != 1)
goto out_free;
if (trace_selftest_test_probe3_cnt != 3)
goto out_free;
if (trace_selftest_test_global_cnt == 0)
goto out;
if (trace_selftest_test_dyn_cnt == 0)
goto out_free;
DYN_FTRACE_TEST_NAME2();
print_counts();
if (trace_selftest_test_probe1_cnt != 2)
goto out_free;
if (trace_selftest_test_probe2_cnt != 2)
goto out_free;
if (trace_selftest_test_probe3_cnt != 4)
goto out_free;
ret = 0;
out_free:
unregister_ftrace_function(dyn_ops);
kfree(dyn_ops);
out:
/* Purposely unregister in the same order */
unregister_ftrace_function(&test_probe1);
unregister_ftrace_function(&test_probe2);
unregister_ftrace_function(&test_probe3);
unregister_ftrace_function(&test_global);
/* Make sure everything is off */
reset_counts();
DYN_FTRACE_TEST_NAME();
DYN_FTRACE_TEST_NAME();
if (trace_selftest_test_probe1_cnt ||
trace_selftest_test_probe2_cnt ||
trace_selftest_test_probe3_cnt ||
trace_selftest_test_global_cnt ||
trace_selftest_test_dyn_cnt)
ret = -1;
ftrace_enabled = save_ftrace_enabled;
return ret;
}
/* Test dynamic code modification and ftrace filters */
int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
struct trace_array *tr,
......@@ -131,7 +331,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
/* filter only on our function */
ftrace_set_filter(func_name, strlen(func_name), 1);
ftrace_set_global_filter(func_name, strlen(func_name), 1);
/* enable tracing */
ret = tracer_init(trace, tr);
......@@ -166,22 +366,30 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
/* we should only have one item */
if (!ret && count != 1) {
trace->reset(tr);
printk(KERN_CONT ".. filter failed count=%ld ..", count);
ret = -1;
goto out;
}
/* Test the ops with global tracing running */
ret = trace_selftest_ops(1);
trace->reset(tr);
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
/* Enable tracing on all functions again */
ftrace_set_filter(NULL, 0, 1);
ftrace_set_global_filter(NULL, 0, 1);
/* Test the ops with global tracing off */
if (!ret)
ret = trace_selftest_ops(2);
return ret;
}
......
......@@ -5,3 +5,9 @@ int DYN_FTRACE_TEST_NAME(void)
/* used to call mcount */
return 0;
}
int DYN_FTRACE_TEST_NAME2(void)
{
/* used to call mcount */
return 0;
}
......@@ -133,6 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = stack_trace_call,
.flags = FTRACE_OPS_FL_GLOBAL,
};
static ssize_t
......
......@@ -251,9 +251,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
{
WARN_ON(strcmp((*entry)->name, elem->name) != 0);
if (elem->regfunc && !elem->state && active)
if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
elem->regfunc();
else if (elem->unregfunc && elem->state && !active)
else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
elem->unregfunc();
/*
......@@ -264,13 +264,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
* is used.
*/
rcu_assign_pointer(elem->funcs, (*entry)->funcs);
if (!elem->state && active) {
jump_label_enable(&elem->state);
elem->state = active;
} else if (elem->state && !active) {
jump_label_disable(&elem->state);
elem->state = active;
}
if (active && !jump_label_enabled(&elem->key))
jump_label_inc(&elem->key);
else if (!active && jump_label_enabled(&elem->key))
jump_label_dec(&elem->key);
}
/*
......@@ -281,13 +278,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
*/
static void disable_tracepoint(struct tracepoint *elem)
{
if (elem->unregfunc && elem->state)
if (elem->unregfunc && jump_label_enabled(&elem->key))
elem->unregfunc();
if (elem->state) {
jump_label_disable(&elem->state);
elem->state = 0;
}
if (jump_label_enabled(&elem->key))
jump_label_dec(&elem->key);
rcu_assign_pointer(elem->funcs, NULL);
}
......
......@@ -244,14 +244,19 @@ endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
ifdef BUILD_C_RECORDMCOUNT
ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
RECORDMCOUNT_FLAGS = -w
endif
# Due to recursion, we must skip empty.o.
# The empty.o file is created in the make process in order to determine
# the target endianness and word size. It is made before all other C
# files, including recordmcount.
sub_cmd_record_mcount = \
if [ $(@) != "scripts/mod/empty.o" ]; then \
$(objtree)/scripts/recordmcount "$(@)"; \
$(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \
fi;
recordmcount_source := $(srctree)/scripts/recordmcount.c \
$(srctree)/scripts/recordmcount.h
else
sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
......@@ -259,6 +264,7 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH
"$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
"$(LD)" "$(NM)" "$(RM)" "$(MV)" \
"$(if $(part-of-module),1,0)" "$(@)";
recordmcount_source := $(srctree)/scripts/recordmcount.pl
endif
cmd_record_mcount = \
if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \
......@@ -279,13 +285,13 @@ define rule_cc_o_c
endef
# Built-in and composite module parts
$(obj)/%.o: $(src)/%.c FORCE
$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
$(call if_changed_rule,cc_o_c)
# Single-part modules are special since we need to mark them in $(MODVERDIR)
$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE
$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
$(call if_changed_rule,cc_o_c)
@{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod)
......
This diff is collapsed.
This diff is collapsed.
......@@ -134,6 +134,7 @@ my %text_sections = (
".sched.text" => 1,
".spinlock.text" => 1,
".irqentry.text" => 1,
".kprobes.text" => 1,
".text.unlikely" => 1,
);
......@@ -222,6 +223,7 @@ if ($arch eq "x86_64") {
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
$type = ".quad";
$alignment = 8;
$mcount_adjust = -1;
# force flags for this arch
$ld .= " -m elf_x86_64";
......@@ -231,6 +233,7 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "i386") {
$alignment = 4;
$mcount_adjust = -1;
# force flags for this arch
$ld .= " -m elf_i386";
......@@ -240,12 +243,14 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "s390" && $bits == 32) {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$";
$mcount_adjust = -4;
$alignment = 4;
$ld .= " -m elf_s390";
$cc .= " -m31";
} elsif ($arch eq "s390" && $bits == 64) {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
$mcount_adjust = -8;
$alignment = 8;
$type = ".quad";
$ld .= " -m elf64_s390";
......
......@@ -113,13 +113,61 @@ OPTIONS
Do various checks like samples ordering and lost events.
-f::
--fields
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, sym. Field
list must be prepended with the type, trace, sw or hw,
list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,sym and -f trace:time,cpu,trace
perf script -f <fields>
is equivalent to:
perf script -f trace:<fields> -f sw:<fields> -f hw:<fields>
i.e., the specified fields apply to all event types if the type string
is not given.
The arguments are processed in the order received. A later usage can
reset a prior request. e.g.:
-f trace: -f comm,tid,time,sym
The first -f suppresses trace events (field list is ""), but then the
second invocation sets the fields to comm,tid,time,sym. In this case a
warning is given to the user:
"Overriding previous field request for all events."
Alternativey, consider the order:
-f comm,tid,time,sym -f trace:
The first -f sets the fields for all events and the second -f
suppresses trace events. The user is given a warning message about
the override, and the result of the above is that only S/W and H/W
events are displayed with the given fields.
For the 'wildcard' option if a user selected field is invalid for an
event type, a message is displayed to the user that the option is
ignored for that type. For example:
$ perf script -f comm,tid,trace
'trace' not valid for hardware events. Ignoring.
'trace' not valid for software events. Ignoring.
Alternatively, if the type is given an invalid field is specified it
is an error. For example:
perf script -v -f sw:comm,tid,trace
'trace' not valid for software events.
At this point usage is displayed, and perf-script exits.
Finally, a user may not set fields to none for all event types.
i.e., -f "" is not allowed.
-k::
--vmlinux=<file>::
vmlinux pathname
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment