Commit 6f696eb1 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (57 commits)
  x86, perf events: Check if we have APIC enabled
  perf_event: Fix variable initialization in other codepaths
  perf kmem: Fix unused argument build warning
  perf symbols: perf_header__read_build_ids() offset'n'size should be u64
  perf symbols: dsos__read_build_ids() should read both user and kernel buildids
  perf tools: Align long options which have no short forms
  perf kmem: Show usage if no option is specified
  sched: Mark sched_clock() as notrace
  perf sched: Add max delay time snapshot
  perf tools: Correct size given to memset
  perf_event: Fix perf_swevent_hrtimer() variable initialization
  perf sched: Fix for getting task's execution time
  tracing/kprobes: Fix field creation's bad error handling
  perf_event: Cleanup for cpu_clock_perf_event_update()
  perf_event: Allocate children's perf_event_ctxp at the right time
  perf_event: Clean up __perf_event_init_context()
  hw-breakpoints: Modify breakpoints without unregistering them
  perf probe: Update perf-probe document
  perf probe: Support --del option
  trace-kprobe: Support delete probe syntax
  ...
parents c4e194e3 12558038
...@@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT ...@@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT
def_bool y def_bool y
config X86_DECODER_SELFTEST config X86_DECODER_SELFTEST
bool "x86 instruction decoder selftest" bool "x86 instruction decoder selftest"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL && KPROBES
---help--- ---help---
Perform x86 instruction decoder selftests at build time. Perform x86 instruction decoder selftests at build time.
This option is useful for checking the sanity of x86 instruction This option is useful for checking the sanity of x86 instruction
......
...@@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) ...@@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
data.period = event->hw.last_period; data.period = event->hw.last_period;
data.addr = 0; data.addr = 0;
data.raw = NULL;
regs.ip = 0; regs.ip = 0;
/* /*
...@@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) ...@@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
u64 val; u64 val;
data.addr = 0; data.addr = 0;
data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events); cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 ack, status; u64 ack, status;
data.addr = 0; data.addr = 0;
data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events); cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) ...@@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
u64 val; u64 val;
data.addr = 0; data.addr = 0;
data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events); cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -2062,12 +2066,6 @@ static __init int p6_pmu_init(void) ...@@ -2062,12 +2066,6 @@ static __init int p6_pmu_init(void)
x86_pmu = p6_pmu; x86_pmu = p6_pmu;
if (!cpu_has_apic) {
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
pr_info("no hardware sampling interrupt available.\n");
x86_pmu.apic = 0;
}
return 0; return 0;
} }
...@@ -2159,6 +2157,16 @@ static __init int amd_pmu_init(void) ...@@ -2159,6 +2157,16 @@ static __init int amd_pmu_init(void)
return 0; return 0;
} }
static void __init pmu_check_apic(void)
{
if (cpu_has_apic)
return;
x86_pmu.apic = 0;
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
pr_info("no hardware sampling interrupt available.\n");
}
void __init init_hw_perf_events(void) void __init init_hw_perf_events(void)
{ {
int err; int err;
...@@ -2180,6 +2188,8 @@ void __init init_hw_perf_events(void) ...@@ -2180,6 +2188,8 @@ void __init init_hw_perf_events(void)
return; return;
} }
pmu_check_apic();
pr_cont("%s PMU driver.\n", x86_pmu.name); pr_cont("%s PMU driver.\n", x86_pmu.name);
if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
...@@ -2287,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) ...@@ -2287,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
static DEFINE_PER_CPU(int, in_nmi_frame); static DEFINE_PER_CPU(int, in_ignored_frame);
static void static void
...@@ -2303,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg) ...@@ -2303,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name) static int backtrace_stack(void *data, char *name)
{ {
per_cpu(in_nmi_frame, smp_processor_id()) = per_cpu(in_ignored_frame, smp_processor_id()) =
x86_is_stack_id(NMI_STACK, name); x86_is_stack_id(NMI_STACK, name) ||
x86_is_stack_id(DEBUG_STACK, name);
return 0; return 0;
} }
...@@ -2313,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) ...@@ -2313,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
{ {
struct perf_callchain_entry *entry = data; struct perf_callchain_entry *entry = data;
if (per_cpu(in_nmi_frame, smp_processor_id())) if (per_cpu(in_ignored_frame, smp_processor_id()))
return; return;
if (reliable) if (reliable)
......
...@@ -103,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, ...@@ -103,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
return NULL; return NULL;
} }
static inline int
in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
unsigned long *irq_stack_end)
{
return (stack >= irq_stack && stack < irq_stack_end);
}
/*
* We are returning from the irq stack and go to the previous one.
* If the previous stack is also in the irq stack, then bp in the first
* frame of the irq stack points to the previous, interrupted one.
* Otherwise we have another level of indirection: We first save
* the bp of the previous stack, then we switch the stack to the irq one
* and save a new bp that links to the previous one.
* (See save_args())
*/
static inline unsigned long
fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
unsigned long *irq_stack, unsigned long *irq_stack_end)
{
#ifdef CONFIG_FRAME_POINTER
struct stack_frame *frame = (struct stack_frame *)bp;
if (!in_irq_stack(stack, irq_stack, irq_stack_end))
return (unsigned long)frame->next_frame;
#endif
return bp;
}
/* /*
* x86-64 can have up to three kernel stacks: * x86-64 can have up to three kernel stacks:
* process stack * process stack
...@@ -175,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ...@@ -175,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
irq_stack = irq_stack_end - irq_stack = irq_stack_end -
(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
if (stack >= irq_stack && stack < irq_stack_end) { if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
if (ops->stack(data, "IRQ") < 0) if (ops->stack(data, "IRQ") < 0)
break; break;
bp = print_context_stack(tinfo, stack, bp, bp = print_context_stack(tinfo, stack, bp,
...@@ -186,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ...@@ -186,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
* pointer (index -1 to end) in the IRQ stack: * pointer (index -1 to end) in the IRQ stack:
*/ */
stack = (unsigned long *) (irq_stack_end[-1]); stack = (unsigned long *) (irq_stack_end[-1]);
bp = fixup_bp_irq_link(bp, stack, irq_stack,
irq_stack_end);
irq_stack_end = NULL; irq_stack_end = NULL;
ops->stack(data, "EOI"); ops->stack(data, "EOI");
continue; continue;
......
...@@ -1076,10 +1076,10 @@ ENTRY(\sym) ...@@ -1076,10 +1076,10 @@ ENTRY(\sym)
TRACE_IRQS_OFF TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */ movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */ xorl %esi,%esi /* no error code */
PER_CPU(init_tss, %rbp) PER_CPU(init_tss, %r12)
subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
call \do_sym call \do_sym
addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
jmp paranoid_exit /* %ebx: no swapgs flag */ jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC CFI_ENDPROC
END(\sym) END(\sym)
......
...@@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, ...@@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
return ret; return ret;
} }
if (bp->callback) ret = arch_store_info(bp);
ret = arch_store_info(bp);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) ...@@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
break; break;
} }
(bp->callback)(bp, args->regs); perf_bp_event(bp, args->regs);
rcu_read_unlock(); rcu_read_unlock();
} }
......
...@@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target, ...@@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target,
return ret; return ret;
} }
static void ptrace_triggered(struct perf_event *bp, void *data) static void ptrace_triggered(struct perf_event *bp, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{ {
int i; int i;
struct thread_struct *thread = &(current->thread); struct thread_struct *thread = &(current->thread);
...@@ -593,13 +595,13 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) ...@@ -593,13 +595,13 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
return dr7; return dr7;
} }
static struct perf_event * static int
ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
struct task_struct *tsk, int disabled) struct task_struct *tsk, int disabled)
{ {
int err; int err;
int gen_len, gen_type; int gen_len, gen_type;
DEFINE_BREAKPOINT_ATTR(attr); struct perf_event_attr attr;
/* /*
* We shoud have at least an inactive breakpoint at this * We shoud have at least an inactive breakpoint at this
...@@ -607,18 +609,18 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, ...@@ -607,18 +609,18 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
* written the address register first * written the address register first
*/ */
if (!bp) if (!bp)
return ERR_PTR(-EINVAL); return -EINVAL;
err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
if (err) if (err)
return ERR_PTR(err); return err;
attr = bp->attr; attr = bp->attr;
attr.bp_len = gen_len; attr.bp_len = gen_len;
attr.bp_type = gen_type; attr.bp_type = gen_type;
attr.disabled = disabled; attr.disabled = disabled;
return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); return modify_user_hw_breakpoint(bp, &attr);
} }
/* /*
...@@ -656,28 +658,17 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) ...@@ -656,28 +658,17 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
if (!second_pass) if (!second_pass)
continue; continue;
thread->ptrace_bps[i] = NULL; rc = ptrace_modify_breakpoint(bp, len, type,
bp = ptrace_modify_breakpoint(bp, len, type,
tsk, 1); tsk, 1);
if (IS_ERR(bp)) { if (rc)
rc = PTR_ERR(bp);
thread->ptrace_bps[i] = NULL;
break; break;
}
thread->ptrace_bps[i] = bp;
} }
continue; continue;
} }
bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
if (rc)
/* Incorrect bp, or we have a bug in bp API */
if (IS_ERR(bp)) {
rc = PTR_ERR(bp);
thread->ptrace_bps[i] = NULL;
break; break;
}
thread->ptrace_bps[i] = bp;
} }
/* /*
* Make a second pass to free the remaining unused breakpoints * Make a second pass to free the remaining unused breakpoints
...@@ -721,9 +712,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, ...@@ -721,9 +712,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
{ {
struct perf_event *bp; struct perf_event *bp;
struct thread_struct *t = &tsk->thread; struct thread_struct *t = &tsk->thread;
DEFINE_BREAKPOINT_ATTR(attr); struct perf_event_attr attr;
if (!t->ptrace_bps[nr]) { if (!t->ptrace_bps[nr]) {
hw_breakpoint_init(&attr);
/* /*
* Put stub len and type to register (reserve) an inactive but * Put stub len and type to register (reserve) an inactive but
* correct bp * correct bp
...@@ -734,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, ...@@ -734,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
attr.disabled = 1; attr.disabled = 1;
bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
/*
* CHECKME: the previous code returned -EIO if the addr wasn't
* a valid task virtual addr. The new one will return -EINVAL in
* this case.
* -EINVAL may be what we want for in-kernel breakpoints users,
* but -EIO looks better for ptrace, since we refuse a register
* writing for the user. And anyway this is the previous
* behaviour.
*/
if (IS_ERR(bp))
return PTR_ERR(bp);
t->ptrace_bps[nr] = bp;
} else { } else {
int err;
bp = t->ptrace_bps[nr]; bp = t->ptrace_bps[nr];
t->ptrace_bps[nr] = NULL;
attr = bp->attr; attr = bp->attr;
attr.bp_addr = addr; attr.bp_addr = addr;
bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); err = modify_user_hw_breakpoint(bp, &attr);
if (err)
return err;
} }
/*
* CHECKME: the previous code returned -EIO if the addr wasn't a
* valid task virtual addr. The new one will return -EINVAL in this
* case.
* -EINVAL may be what we want for in-kernel breakpoints users, but
* -EIO looks better for ptrace, since we refuse a register writing
* for the user. And anyway this is the previous behaviour.
*/
if (IS_ERR(bp))
return PTR_ERR(bp);
t->ptrace_bps[nr] = bp;
return 0; return 0;
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@ quiet_cmd_inat_tables = GEN $@
cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
$(call cmd,inat_tables) $(call cmd,inat_tables)
...@@ -20,7 +20,7 @@ lib-y := delay.o ...@@ -20,7 +20,7 @@ lib-y := delay.o
lib-y += thunk_$(BITS).o lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += usercopy_$(BITS).o getuser.o putuser.o
lib-y += memcpy_$(BITS).o lib-y += memcpy_$(BITS).o
lib-y += insn.o inat.o lib-$(CONFIG_KPROBES) += insn.o inat.o
obj-y += msr-reg.o msr-reg-export.o obj-y += msr-reg.o msr-reg-export.o
......
...@@ -113,7 +113,7 @@ int main(int argc, char **argv) ...@@ -113,7 +113,7 @@ int main(int argc, char **argv)
char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
unsigned char insn_buf[16]; unsigned char insn_buf[16];
struct insn insn; struct insn insn;
int insns = 0, c; int insns = 0;
int warnings = 0; int warnings = 0;
parse_args(argc, argv); parse_args(argc, argv);
......
...@@ -20,19 +20,18 @@ enum { ...@@ -20,19 +20,18 @@ enum {
#ifdef CONFIG_HAVE_HW_BREAKPOINT #ifdef CONFIG_HAVE_HW_BREAKPOINT
/* As it's for in-kernel or ptrace use, we want it to be pinned */
#define DEFINE_BREAKPOINT_ATTR(name) \
struct perf_event_attr name = { \
.type = PERF_TYPE_BREAKPOINT, \
.size = sizeof(name), \
.pinned = 1, \
};
static inline void hw_breakpoint_init(struct perf_event_attr *attr) static inline void hw_breakpoint_init(struct perf_event_attr *attr)
{ {
memset(attr, 0, sizeof(*attr));
attr->type = PERF_TYPE_BREAKPOINT; attr->type = PERF_TYPE_BREAKPOINT;
attr->size = sizeof(*attr); attr->size = sizeof(*attr);
/*
* As it's for in-kernel or ptrace use, we want it to be pinned
* and to call its callback every hits.
*/
attr->pinned = 1; attr->pinned = 1;
attr->sample_period = 1;
} }
static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
...@@ -52,27 +51,24 @@ static inline int hw_breakpoint_len(struct perf_event *bp) ...@@ -52,27 +51,24 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
extern struct perf_event * extern struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr, register_user_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered, perf_overflow_handler_t triggered,
struct task_struct *tsk); struct task_struct *tsk);
/* FIXME: only change from the attr, and don't unregister */ /* FIXME: only change from the attr, and don't unregister */
extern struct perf_event * extern int
modify_user_hw_breakpoint(struct perf_event *bp, modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
struct perf_event_attr *attr,
perf_callback_t triggered,
struct task_struct *tsk);
/* /*
* Kernel breakpoints are not associated with any particular thread. * Kernel breakpoints are not associated with any particular thread.
*/ */
extern struct perf_event * extern struct perf_event *
register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
perf_callback_t triggered, perf_overflow_handler_t triggered,
int cpu); int cpu);
extern struct perf_event ** extern struct perf_event **
register_wide_hw_breakpoint(struct perf_event_attr *attr, register_wide_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered); perf_overflow_handler_t triggered);
extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int register_perf_hw_breakpoint(struct perf_event *bp);
extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern int __register_perf_hw_breakpoint(struct perf_event *bp);
...@@ -93,20 +89,18 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) ...@@ -93,20 +89,18 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
static inline struct perf_event * static inline struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr, register_user_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered, perf_overflow_handler_t triggered,
struct task_struct *tsk) { return NULL; } struct task_struct *tsk) { return NULL; }
static inline struct perf_event * static inline int
modify_user_hw_breakpoint(struct perf_event *bp, modify_user_hw_breakpoint(struct perf_event *bp,
struct perf_event_attr *attr, struct perf_event_attr *attr) { return NULL; }
perf_callback_t triggered,
struct task_struct *tsk) { return NULL; }
static inline struct perf_event * static inline struct perf_event *
register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
perf_callback_t triggered, perf_overflow_handler_t triggered,
int cpu) { return NULL; } int cpu) { return NULL; }
static inline struct perf_event ** static inline struct perf_event **
register_wide_hw_breakpoint(struct perf_event_attr *attr, register_wide_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered) { return NULL; } perf_overflow_handler_t triggered) { return NULL; }
static inline int static inline int
register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline int static inline int
......
...@@ -18,10 +18,6 @@ ...@@ -18,10 +18,6 @@
#include <linux/ioctl.h> #include <linux/ioctl.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <asm/hw_breakpoint.h>
#endif
/* /*
* User-space ABI bits: * User-space ABI bits:
*/ */
...@@ -215,12 +211,12 @@ struct perf_event_attr { ...@@ -215,12 +211,12 @@ struct perf_event_attr {
__u32 wakeup_watermark; /* bytes before wakeup */ __u32 wakeup_watermark; /* bytes before wakeup */
}; };
union { struct { /* Hardware breakpoint info */
struct { /* Hardware breakpoint info */ __u64 bp_addr;
__u64 bp_addr; __u32 bp_type;
__u32 bp_type; __u32 bp_len;
__u32 bp_len; __u64 __bp_reserved_1;
}; __u64 __bp_reserved_2;
}; };
__u32 __reserved_2; __u32 __reserved_2;
...@@ -451,6 +447,10 @@ enum perf_callchain_context { ...@@ -451,6 +447,10 @@ enum perf_callchain_context {
# include <asm/perf_event.h> # include <asm/perf_event.h>
#endif #endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <asm/hw_breakpoint.h>
#endif
#include <linux/list.h> #include <linux/list.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/rculist.h> #include <linux/rculist.h>
...@@ -565,10 +565,12 @@ struct perf_pending_entry { ...@@ -565,10 +565,12 @@ struct perf_pending_entry {
void (*func)(struct perf_pending_entry *); void (*func)(struct perf_pending_entry *);
}; };
typedef void (*perf_callback_t)(struct perf_event *, void *);
struct perf_sample_data; struct perf_sample_data;
typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
struct perf_sample_data *,
struct pt_regs *regs);
/** /**
* struct perf_event - performance event kernel representation: * struct perf_event - performance event kernel representation:
*/ */
...@@ -660,18 +662,12 @@ struct perf_event { ...@@ -660,18 +662,12 @@ struct perf_event {
struct pid_namespace *ns; struct pid_namespace *ns;
u64 id; u64 id;
void (*overflow_handler)(struct perf_event *event, perf_overflow_handler_t overflow_handler;
int nmi, struct perf_sample_data *data,
struct pt_regs *regs);
#ifdef CONFIG_EVENT_PROFILE #ifdef CONFIG_EVENT_PROFILE
struct event_filter *filter; struct event_filter *filter;
#endif #endif
perf_callback_t callback;
perf_callback_t event_callback;
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
}; };
...@@ -781,7 +777,7 @@ extern struct perf_event * ...@@ -781,7 +777,7 @@ extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, perf_event_create_kernel_counter(struct perf_event_attr *attr,
int cpu, int cpu,
pid_t pid, pid_t pid,
perf_callback_t callback); perf_overflow_handler_t callback);
extern u64 perf_event_read_value(struct perf_event *event, extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running); u64 *enabled, u64 *running);
...@@ -876,6 +872,8 @@ extern void perf_output_copy(struct perf_output_handle *handle, ...@@ -876,6 +872,8 @@ extern void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len); const void *buf, unsigned int len);
extern int perf_swevent_get_recursion_context(void); extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx); extern void perf_swevent_put_recursion_context(int rctx);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
#else #else
static inline void static inline void
perf_event_task_sched_in(struct task_struct *task, int cpu) { } perf_event_task_sched_in(struct task_struct *task, int cpu) { }
...@@ -906,7 +904,8 @@ static inline void perf_event_fork(struct task_struct *tsk) { } ...@@ -906,7 +904,8 @@ static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { } static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
#endif #endif
#define perf_output_put(handle, x) \ #define perf_output_put(handle, x) \
......
...@@ -1840,7 +1840,8 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) ...@@ -1840,7 +1840,8 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
extern int sched_clock_stable; extern int sched_clock_stable;
#endif #endif
extern unsigned long long sched_clock(void); /* ftrace calls sched_clock() directly */
extern unsigned long long notrace sched_clock(void);
extern void sched_clock_init(void); extern void sched_clock_init(void);
extern u64 sched_clock_cpu(int cpu); extern u64 sched_clock_cpu(int cpu);
......
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
/* Number of pinned task breakpoints in a cpu */ /* Number of pinned task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
/* Number of non-pinned cpu/task breakpoints in a cpu */ /* Number of non-pinned cpu/task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
...@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex); ...@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
static unsigned int max_task_bp_pinned(int cpu) static unsigned int max_task_bp_pinned(int cpu)
{ {
int i; int i;
unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
for (i = HBP_NUM -1; i >= 0; i--) { for (i = HBP_NUM -1; i >= 0; i--) {
if (tsk_pinned[i] > 0) if (tsk_pinned[i] > 0)
...@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu) ...@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu)
return 0; return 0;
} }
static int task_bp_pinned(struct task_struct *tsk)
{
struct perf_event_context *ctx = tsk->perf_event_ctxp;
struct list_head *list;
struct perf_event *bp;
unsigned long flags;
int count = 0;
if (WARN_ONCE(!ctx, "No perf context for this task"))
return 0;
list = &ctx->event_list;
spin_lock_irqsave(&ctx->lock, flags);
/*
* The current breakpoint counter is not included in the list
* at the open() callback time
*/
list_for_each_entry(bp, list, event_entry) {
if (bp->attr.type == PERF_TYPE_BREAKPOINT)
count++;
}
spin_unlock_irqrestore(&ctx->lock, flags);
return count;
}
/* /*
* Report the number of pinned/un-pinned breakpoints we have in * Report the number of pinned/un-pinned breakpoints we have in
* a given cpu (cpu > -1) or in all of them (cpu = -1). * a given cpu (cpu > -1) or in all of them (cpu = -1).
*/ */
static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) static void
fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
{ {
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
if (cpu >= 0) { if (cpu >= 0) {
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
slots->pinned += max_task_bp_pinned(cpu); if (!tsk)
slots->pinned += max_task_bp_pinned(cpu);
else
slots->pinned += task_bp_pinned(tsk);
slots->flexible = per_cpu(nr_bp_flexible, cpu); slots->flexible = per_cpu(nr_bp_flexible, cpu);
return; return;
...@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) ...@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
unsigned int nr; unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned, cpu); nr = per_cpu(nr_cpu_bp_pinned, cpu);
nr += max_task_bp_pinned(cpu); if (!tsk)
nr += max_task_bp_pinned(cpu);
else
nr += task_bp_pinned(tsk);
if (nr > slots->pinned) if (nr > slots->pinned)
slots->pinned = nr; slots->pinned = nr;
...@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) ...@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
*/ */
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
{ {
int count = 0;
struct perf_event *bp;
struct perf_event_context *ctx = tsk->perf_event_ctxp;
unsigned int *tsk_pinned; unsigned int *tsk_pinned;
struct list_head *list; int count = 0;
unsigned long flags;
if (WARN_ONCE(!ctx, "No perf context for this task"))
return;
list = &ctx->event_list;
spin_lock_irqsave(&ctx->lock, flags);
/*
* The current breakpoint counter is not included in the list
* at the open() callback time
*/
list_for_each_entry(bp, list, event_entry) {
if (bp->attr.type == PERF_TYPE_BREAKPOINT)
count++;
}
spin_unlock_irqrestore(&ctx->lock, flags); count = task_bp_pinned(tsk);
if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
return;
tsk_pinned = per_cpu(task_bp_pinned, cpu);
if (enable) { if (enable) {
tsk_pinned[count]++; tsk_pinned[count]++;
if (count > 0) if (count > 0)
...@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) ...@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check: * - If attached to a single cpu, check:
* *
* (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
* + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
* *
* -> If there are already non-pinned counters in this cpu, it means * -> If there are already non-pinned counters in this cpu, it means
* there is already a free slot for them. * there is already a free slot for them.
...@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) ...@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check: * - If attached to every cpus, check:
* *
* (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
* + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
* *
* -> This is roughly the same, except we check the number of per cpu * -> This is roughly the same, except we check the number of per cpu
* bp for every cpu and we keep the max one. Same for the per tasks * bp for every cpu and we keep the max one. Same for the per tasks
...@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) ...@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check: * - If attached to a single cpu, check:
* *
* ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
* + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
* *
* -> Same checks as before. But now the nr_bp_flexible, if any, must keep * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
* one register at least (or they will never be fed). * one register at least (or they will never be fed).
...@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) ...@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check: * - If attached to every cpus, check:
* *
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
* + max(per_cpu(task_bp_pinned, *))) < HBP_NUM * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
*/ */
int reserve_bp_slot(struct perf_event *bp) int reserve_bp_slot(struct perf_event *bp)
{ {
...@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp) ...@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
mutex_lock(&nr_bp_mutex); mutex_lock(&nr_bp_mutex);
fetch_bp_busy_slots(&slots, bp->cpu); fetch_bp_busy_slots(&slots, bp);
/* Flexible counters need to keep at least one slot */ /* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) == HBP_NUM) { if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
...@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp) ...@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
} }
int __register_perf_hw_breakpoint(struct perf_event *bp) int register_perf_hw_breakpoint(struct perf_event *bp)
{ {
int ret; int ret;
...@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp) ...@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
* This is a quick hack that will be removed soon, once we remove * This is a quick hack that will be removed soon, once we remove
* the tmp breakpoints from ptrace * the tmp breakpoints from ptrace
*/ */
if (!bp->attr.disabled || bp->callback == perf_bp_event) if (!bp->attr.disabled || !bp->overflow_handler)
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
return ret; return ret;
} }
int register_perf_hw_breakpoint(struct perf_event *bp)
{
bp->callback = perf_bp_event;
return __register_perf_hw_breakpoint(bp);
}
/** /**
* register_user_hw_breakpoint - register a hardware breakpoint for user space * register_user_hw_breakpoint - register a hardware breakpoint for user space
* @attr: breakpoint attributes * @attr: breakpoint attributes
...@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) ...@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
*/ */
struct perf_event * struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr, register_user_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered, perf_overflow_handler_t triggered,
struct task_struct *tsk) struct task_struct *tsk)
{ {
return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
...@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); ...@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
* @triggered: callback to trigger when we hit the breakpoint * @triggered: callback to trigger when we hit the breakpoint
* @tsk: pointer to 'task_struct' of the process to which the address belongs * @tsk: pointer to 'task_struct' of the process to which the address belongs
*/ */
struct perf_event * int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
perf_callback_t triggered,
struct task_struct *tsk)
{ {
/* u64 old_addr = bp->attr.bp_addr;
* FIXME: do it without unregistering int old_type = bp->attr.bp_type;
* - We don't want to lose our slot int old_len = bp->attr.bp_len;
* - If the new bp is incorrect, don't lose the older one int err = 0;
*/
unregister_hw_breakpoint(bp);
return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); perf_event_disable(bp);
bp->attr.bp_addr = attr->bp_addr;
bp->attr.bp_type = attr->bp_type;
bp->attr.bp_len = attr->bp_len;
if (attr->disabled)
goto end;
err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
if (!err)
perf_event_enable(bp);
if (err) {
bp->attr.bp_addr = old_addr;
bp->attr.bp_type = old_type;
bp->attr.bp_len = old_len;
if (!bp->attr.disabled)
perf_event_enable(bp);
return err;
}
end:
bp->attr.disabled = attr->disabled;
return 0;
} }
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
...@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); ...@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
*/ */
struct perf_event ** struct perf_event **
register_wide_hw_breakpoint(struct perf_event_attr *attr, register_wide_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered) perf_overflow_handler_t triggered)
{ {
struct perf_event **cpu_events, **pevent, *bp; struct perf_event **cpu_events, **pevent, *bp;
long err; long err;
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
/* /*
* Each CPU has a list of per CPU events: * Each CPU has a list of per CPU events:
*/ */
DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
int perf_max_events __read_mostly = 1; int perf_max_events __read_mostly = 1;
static int perf_reserved_percpu __read_mostly; static int perf_reserved_percpu __read_mostly;
...@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info) ...@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
* is the current context on this CPU and preemption is disabled, * is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context. * hence we can't get into perf_event_task_sched_out for this context.
*/ */
static void perf_event_disable(struct perf_event *event) void perf_event_disable(struct perf_event *event)
{ {
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task; struct task_struct *task = ctx->task;
...@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info) ...@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info)
* perf_event_for_each_child or perf_event_for_each as described * perf_event_for_each_child or perf_event_for_each as described
* for perf_event_disable. * for perf_event_disable.
*/ */
static void perf_event_enable(struct perf_event *event) void perf_event_enable(struct perf_event *event)
{ {
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task; struct task_struct *task = ctx->task;
...@@ -1579,7 +1579,6 @@ static void ...@@ -1579,7 +1579,6 @@ static void
__perf_event_init_context(struct perf_event_context *ctx, __perf_event_init_context(struct perf_event_context *ctx,
struct task_struct *task) struct task_struct *task)
{ {
memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->lock); spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex); mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->group_list); INIT_LIST_HEAD(&ctx->group_list);
...@@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) ...@@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
} }
if (!ctx) { if (!ctx) {
ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
err = -ENOMEM; err = -ENOMEM;
if (!ctx) if (!ctx)
goto errout; goto errout;
...@@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) ...@@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event); event->pmu->read(event);
data.addr = 0; data.addr = 0;
data.raw = NULL;
data.period = event->hw.last_period; data.period = event->hw.last_period;
regs = get_irq_regs(); regs = get_irq_regs();
/* /*
...@@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event) ...@@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
u64 now; u64 now;
now = cpu_clock(cpu); now = cpu_clock(cpu);
prev = atomic64_read(&event->hw.prev_count); prev = atomic64_xchg(&event->hw.prev_count, now);
atomic64_set(&event->hw.prev_count, now);
atomic64_add(now - prev, &event->count); atomic64_add(now - prev, &event->count);
} }
...@@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event) ...@@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
static const struct pmu *bp_perf_event_init(struct perf_event *bp) static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{ {
int err; int err;
/*
* The breakpoint is already filled if we haven't created the counter err = register_perf_hw_breakpoint(bp);
* through perf syscall
* FIXME: manage to get trigerred to NULL if it comes from syscalls
*/
if (!bp->callback)
err = register_perf_hw_breakpoint(bp);
else
err = __register_perf_hw_breakpoint(bp);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
...@@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data) ...@@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample; struct perf_sample_data sample;
struct pt_regs *regs = data; struct pt_regs *regs = data;
sample.raw = NULL;
sample.addr = bp->attr.bp_addr; sample.addr = bp->attr.bp_addr;
if (!perf_exclude_event(bp, regs)) if (!perf_exclude_event(bp, regs))
...@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, ...@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
struct perf_event_context *ctx, struct perf_event_context *ctx,
struct perf_event *group_leader, struct perf_event *group_leader,
struct perf_event *parent_event, struct perf_event *parent_event,
perf_callback_t callback, perf_overflow_handler_t overflow_handler,
gfp_t gfpflags) gfp_t gfpflags)
{ {
const struct pmu *pmu; const struct pmu *pmu;
...@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr, ...@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr,
event->state = PERF_EVENT_STATE_INACTIVE; event->state = PERF_EVENT_STATE_INACTIVE;
if (!callback && parent_event) if (!overflow_handler && parent_event)
callback = parent_event->callback; overflow_handler = parent_event->overflow_handler;
event->callback = callback; event->overflow_handler = overflow_handler;
if (attr->disabled) if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF; event->state = PERF_EVENT_STATE_OFF;
...@@ -4776,7 +4769,8 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -4776,7 +4769,8 @@ SYSCALL_DEFINE5(perf_event_open,
*/ */
struct perf_event * struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
pid_t pid, perf_callback_t callback) pid_t pid,
perf_overflow_handler_t overflow_handler)
{ {
struct perf_event *event; struct perf_event *event;
struct perf_event_context *ctx; struct perf_event_context *ctx;
...@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ...@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
} }
event = perf_event_alloc(attr, cpu, ctx, NULL, event = perf_event_alloc(attr, cpu, ctx, NULL,
NULL, callback, GFP_KERNEL); NULL, overflow_handler, GFP_KERNEL);
if (IS_ERR(event)) { if (IS_ERR(event)) {
err = PTR_ERR(event); err = PTR_ERR(event);
goto err_put_context; goto err_put_context;
...@@ -5090,7 +5084,7 @@ void perf_event_free_task(struct task_struct *task) ...@@ -5090,7 +5084,7 @@ void perf_event_free_task(struct task_struct *task)
*/ */
int perf_event_init_task(struct task_struct *child) int perf_event_init_task(struct task_struct *child)
{ {
struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *child_ctx = NULL, *parent_ctx;
struct perf_event_context *cloned_ctx; struct perf_event_context *cloned_ctx;
struct perf_event *event; struct perf_event *event;
struct task_struct *parent = current; struct task_struct *parent = current;
...@@ -5105,20 +5099,6 @@ int perf_event_init_task(struct task_struct *child) ...@@ -5105,20 +5099,6 @@ int perf_event_init_task(struct task_struct *child)
if (likely(!parent->perf_event_ctxp)) if (likely(!parent->perf_event_ctxp))
return 0; return 0;
/*
* This is executed from the parent task context, so inherit
* events that have been marked for cloning.
* First allocate and initialize a context for the child.
*/
child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
if (!child_ctx)
return -ENOMEM;
__perf_event_init_context(child_ctx, child);
child->perf_event_ctxp = child_ctx;
get_task_struct(child);
/* /*
* If the parent's context is a clone, pin it so it won't get * If the parent's context is a clone, pin it so it won't get
* swapped under us. * swapped under us.
...@@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child) ...@@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child)
continue; continue;
} }
if (!child->perf_event_ctxp) {
/*
* This is executed from the parent task context, so
* inherit events that have been marked for cloning.
* First allocate and initialize a context for the
* child.
*/
child_ctx = kzalloc(sizeof(struct perf_event_context),
GFP_KERNEL);
if (!child_ctx) {
ret = -ENOMEM;
goto exit;
}
__perf_event_init_context(child_ctx, child);
child->perf_event_ctxp = child_ctx;
get_task_struct(child);
}
ret = inherit_group(event, parent, parent_ctx, ret = inherit_group(event, parent, parent_ctx,
child, child_ctx); child, child_ctx);
if (ret) { if (ret) {
...@@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child) ...@@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child)
get_ctx(child_ctx->parent_ctx); get_ctx(child_ctx->parent_ctx);
} }
exit:
mutex_unlock(&parent_ctx->mutex); mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx); perf_unpin_context(parent_ctx);
......
...@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv) ...@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
*/ */
struct trace_probe *tp; struct trace_probe *tp;
int i, ret = 0; int i, ret = 0;
int is_return = 0; int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
unsigned long offset = 0; unsigned long offset = 0;
void *addr = NULL; void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN]; char buf[MAX_EVENT_NAME_LEN];
if (argc < 2) { /* argc must be >= 1 */
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
if (argv[0][0] == 'p') if (argv[0][0] == 'p')
is_return = 0; is_return = 0;
else if (argv[0][0] == 'r') else if (argv[0][0] == 'r')
is_return = 1; is_return = 1;
else if (argv[0][0] == '-')
is_delete = 1;
else { else {
pr_info("Probe definition must be started with 'p' or 'r'.\n"); pr_info("Probe definition must be started with 'p', 'r' or"
" '-'.\n");
return -EINVAL; return -EINVAL;
} }
...@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv) ...@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL; return -EINVAL;
} }
} }
if (!group)
group = KPROBE_EVENT_SYSTEM;
if (is_delete) {
if (!event) {
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
tp = find_probe_event(event, group);
if (!tp) {
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
unregister_trace_probe(tp);
free_trace_probe(tp);
return 0;
}
if (argc < 2) {
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
if (isdigit(argv[1][0])) { if (isdigit(argv[1][0])) {
if (is_return) { if (is_return) {
pr_info("Return probe point must be a symbol.\n"); pr_info("Return probe point must be a symbol.\n");
...@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv) ...@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
argc -= 2; argv += 2; argc -= 2; argv += 2;
/* setup a probe */ /* setup a probe */
if (!group)
group = KPROBE_EVENT_SYSTEM;
if (!event) { if (!event) {
/* Make a new event name */ /* Make a new event name */
if (symbol) if (symbol)
...@@ -1114,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) ...@@ -1114,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
struct trace_probe *tp = (struct trace_probe *)event_call->data; struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call); ret = trace_define_common_fields(event_call);
if (!ret) if (ret)
return ret; return ret;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
...@@ -1132,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) ...@@ -1132,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
struct trace_probe *tp = (struct trace_probe *)event_call->data; struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call); ret = trace_define_common_fields(event_call);
if (!ret) if (ret)
return ret; return ret;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
......
...@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) ...@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
} }
#endif /* CONFIG_PROFILE_KSYM_TRACER */ #endif /* CONFIG_PROFILE_KSYM_TRACER */
void ksym_hbp_handler(struct perf_event *hbp, void *data) void ksym_hbp_handler(struct perf_event *hbp, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{ {
struct ring_buffer_event *event; struct ring_buffer_event *event;
struct ksym_trace_entry *entry; struct ksym_trace_entry *entry;
struct pt_regs *regs = data;
struct ring_buffer *buffer; struct ring_buffer *buffer;
int pc; int pc;
......
...@@ -41,7 +41,9 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); ...@@ -41,7 +41,9 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
" write operations on the kernel symbol"); " write operations on the kernel symbol");
static void sample_hbp_handler(struct perf_event *temp, void *data) static void sample_hbp_handler(struct perf_event *bp, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{ {
printk(KERN_INFO "%s value is changed\n", ksym_name); printk(KERN_INFO "%s value is changed\n", ksym_name);
dump_stack(); dump_stack();
...@@ -51,8 +53,9 @@ static void sample_hbp_handler(struct perf_event *temp, void *data) ...@@ -51,8 +53,9 @@ static void sample_hbp_handler(struct perf_event *temp, void *data)
static int __init hw_break_module_init(void) static int __init hw_break_module_init(void)
{ {
int ret; int ret;
DEFINE_BREAKPOINT_ATTR(attr); struct perf_event_attr attr;
hw_breakpoint_init(&attr);
attr.bp_addr = kallsyms_lookup_name(ksym_name); attr.bp_addr = kallsyms_lookup_name(ksym_name);
attr.bp_len = HW_BREAKPOINT_LEN_4; attr.bp_len = HW_BREAKPOINT_LEN_4;
attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
......
...@@ -8,16 +8,16 @@ perf-kmem - Tool to trace/measure kernel memory(slab) properties ...@@ -8,16 +8,16 @@ perf-kmem - Tool to trace/measure kernel memory(slab) properties
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'perf kmem' {record} [<options>] 'perf kmem' {record|stat} [<options>]
DESCRIPTION DESCRIPTION
----------- -----------
There's two variants of perf kmem: There are two variants of perf kmem:
'perf kmem record <command>' to record the kmem events 'perf kmem record <command>' to record the kmem events
of an arbitrary workload. of an arbitrary workload.
'perf kmem' to report kernel memory statistics. 'perf kmem stat' to report kernel memory statistics.
OPTIONS OPTIONS
------- -------
...@@ -25,8 +25,11 @@ OPTIONS ...@@ -25,8 +25,11 @@ OPTIONS
--input=<file>:: --input=<file>::
Select the input file (default: perf.data) Select the input file (default: perf.data)
--stat=<caller|alloc>:: --caller::
Select per callsite or per allocation statistics Show per-callsite statistics
--alloc::
Show per-allocation statistics
-s <key[,key2...]>:: -s <key[,key2...]>::
--sort=<key[,key2...]>:: --sort=<key[,key2...]>::
......
...@@ -8,10 +8,13 @@ perf-probe - Define new dynamic tracepoints ...@@ -8,10 +8,13 @@ perf-probe - Define new dynamic tracepoints
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...] 'perf probe' [options] --add='PROBE' [...]
or or
'perf probe' [options] 'PROBE' ['PROBE' ...] 'perf probe' [options] PROBE
or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list
DESCRIPTION DESCRIPTION
----------- -----------
...@@ -31,8 +34,16 @@ OPTIONS ...@@ -31,8 +34,16 @@ OPTIONS
Be more verbose (show parsed arguments, etc). Be more verbose (show parsed arguments, etc).
-a:: -a::
--add:: --add=::
Define a probe point (see PROBE SYNTAX for detail) Define a probe event (see PROBE SYNTAX for detail).
-d::
--del=::
Delete a probe event.
-l::
--list::
List up current probe events.
PROBE SYNTAX PROBE SYNTAX
------------ ------------
......
...@@ -19,7 +19,7 @@ static char const *input_name = "perf.data"; ...@@ -19,7 +19,7 @@ static char const *input_name = "perf.data";
static int force; static int force;
static const char *const buildid_list_usage[] = { static const char *const buildid_list_usage[] = {
"perf report [<options>]", "perf buildid-list [<options>]",
NULL NULL
}; };
......
...@@ -57,11 +57,6 @@ static struct rb_root root_caller_sorted; ...@@ -57,11 +57,6 @@ static struct rb_root root_caller_sorted;
static unsigned long total_requested, total_allocated; static unsigned long total_requested, total_allocated;
static unsigned long nr_allocs, nr_cross_allocs; static unsigned long nr_allocs, nr_cross_allocs;
struct raw_event_sample {
u32 size;
char data[0];
};
#define PATH_SYS_NODE "/sys/devices/system/node" #define PATH_SYS_NODE "/sys/devices/system/node"
static void init_cpunode_map(void) static void init_cpunode_map(void)
...@@ -201,7 +196,7 @@ static void insert_caller_stat(unsigned long call_site, ...@@ -201,7 +196,7 @@ static void insert_caller_stat(unsigned long call_site,
} }
} }
static void process_alloc_event(struct raw_event_sample *raw, static void process_alloc_event(void *data,
struct event *event, struct event *event,
int cpu, int cpu,
u64 timestamp __used, u64 timestamp __used,
...@@ -214,10 +209,10 @@ static void process_alloc_event(struct raw_event_sample *raw, ...@@ -214,10 +209,10 @@ static void process_alloc_event(struct raw_event_sample *raw,
int bytes_alloc; int bytes_alloc;
int node1, node2; int node1, node2;
ptr = raw_field_value(event, "ptr", raw->data); ptr = raw_field_value(event, "ptr", data);
call_site = raw_field_value(event, "call_site", raw->data); call_site = raw_field_value(event, "call_site", data);
bytes_req = raw_field_value(event, "bytes_req", raw->data); bytes_req = raw_field_value(event, "bytes_req", data);
bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); bytes_alloc = raw_field_value(event, "bytes_alloc", data);
insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
insert_caller_stat(call_site, bytes_req, bytes_alloc); insert_caller_stat(call_site, bytes_req, bytes_alloc);
...@@ -227,7 +222,7 @@ static void process_alloc_event(struct raw_event_sample *raw, ...@@ -227,7 +222,7 @@ static void process_alloc_event(struct raw_event_sample *raw,
if (node) { if (node) {
node1 = cpunode_map[cpu]; node1 = cpunode_map[cpu];
node2 = raw_field_value(event, "node", raw->data); node2 = raw_field_value(event, "node", data);
if (node1 != node2) if (node1 != node2)
nr_cross_allocs++; nr_cross_allocs++;
} }
...@@ -262,7 +257,7 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, ...@@ -262,7 +257,7 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
return NULL; return NULL;
} }
static void process_free_event(struct raw_event_sample *raw, static void process_free_event(void *data,
struct event *event, struct event *event,
int cpu, int cpu,
u64 timestamp __used, u64 timestamp __used,
...@@ -271,7 +266,7 @@ static void process_free_event(struct raw_event_sample *raw, ...@@ -271,7 +266,7 @@ static void process_free_event(struct raw_event_sample *raw,
unsigned long ptr; unsigned long ptr;
struct alloc_stat *s_alloc, *s_caller; struct alloc_stat *s_alloc, *s_caller;
ptr = raw_field_value(event, "ptr", raw->data); ptr = raw_field_value(event, "ptr", data);
s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
if (!s_alloc) if (!s_alloc)
...@@ -289,66 +284,53 @@ static void process_free_event(struct raw_event_sample *raw, ...@@ -289,66 +284,53 @@ static void process_free_event(struct raw_event_sample *raw,
} }
static void static void
process_raw_event(event_t *raw_event __used, void *more_data, process_raw_event(event_t *raw_event __used, void *data,
int cpu, u64 timestamp, struct thread *thread) int cpu, u64 timestamp, struct thread *thread)
{ {
struct raw_event_sample *raw = more_data;
struct event *event; struct event *event;
int type; int type;
type = trace_parse_common_type(raw->data); type = trace_parse_common_type(data);
event = trace_find_event(type); event = trace_find_event(type);
if (!strcmp(event->name, "kmalloc") || if (!strcmp(event->name, "kmalloc") ||
!strcmp(event->name, "kmem_cache_alloc")) { !strcmp(event->name, "kmem_cache_alloc")) {
process_alloc_event(raw, event, cpu, timestamp, thread, 0); process_alloc_event(data, event, cpu, timestamp, thread, 0);
return; return;
} }
if (!strcmp(event->name, "kmalloc_node") || if (!strcmp(event->name, "kmalloc_node") ||
!strcmp(event->name, "kmem_cache_alloc_node")) { !strcmp(event->name, "kmem_cache_alloc_node")) {
process_alloc_event(raw, event, cpu, timestamp, thread, 1); process_alloc_event(data, event, cpu, timestamp, thread, 1);
return; return;
} }
if (!strcmp(event->name, "kfree") || if (!strcmp(event->name, "kfree") ||
!strcmp(event->name, "kmem_cache_free")) { !strcmp(event->name, "kmem_cache_free")) {
process_free_event(raw, event, cpu, timestamp, thread); process_free_event(data, event, cpu, timestamp, thread);
return; return;
} }
} }
static int process_sample_event(event_t *event) static int process_sample_event(event_t *event)
{ {
u64 ip = event->ip.ip; struct sample_data data;
u64 timestamp = -1; struct thread *thread;
u32 cpu = -1;
u64 period = 1;
void *more_data = event->ip.__more_data;
struct thread *thread = threads__findnew(event->ip.pid);
if (sample_type & PERF_SAMPLE_TIME) { memset(&data, 0, sizeof(data));
timestamp = *(u64 *)more_data; data.time = -1;
more_data += sizeof(u64); data.cpu = -1;
} data.period = 1;
if (sample_type & PERF_SAMPLE_CPU) {
cpu = *(u32 *)more_data;
more_data += sizeof(u32);
more_data += sizeof(u32); /* reserved */
}
if (sample_type & PERF_SAMPLE_PERIOD) { event__parse_sample(event, sample_type, &data);
period = *(u64 *)more_data;
more_data += sizeof(u64);
}
dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
event->header.misc, event->header.misc,
event->ip.pid, event->ip.tid, data.pid, data.tid,
(void *)(long)ip, (void *)(long)data.ip,
(long long)period); (long long)data.period);
thread = threads__findnew(event->ip.pid);
if (thread == NULL) { if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n", pr_debug("problem processing %d event, skipping it.\n",
event->header.type); event->header.type);
...@@ -357,7 +339,8 @@ static int process_sample_event(event_t *event) ...@@ -357,7 +339,8 @@ static int process_sample_event(event_t *event)
dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
process_raw_event(event, more_data, cpu, timestamp, thread); process_raw_event(event, data.raw_data, data.cpu,
data.time, thread);
return 0; return 0;
} }
...@@ -543,7 +526,7 @@ static int __cmd_kmem(void) ...@@ -543,7 +526,7 @@ static int __cmd_kmem(void)
} }
static const char * const kmem_usage[] = { static const char * const kmem_usage[] = {
"perf kmem [<options>] {record}", "perf kmem [<options>] {record|stat}",
NULL NULL
}; };
...@@ -703,18 +686,17 @@ static int parse_sort_opt(const struct option *opt __used, ...@@ -703,18 +686,17 @@ static int parse_sort_opt(const struct option *opt __used,
return 0; return 0;
} }
static int parse_stat_opt(const struct option *opt __used, static int parse_caller_opt(const struct option *opt __used,
const char *arg, int unset __used) const char *arg __used, int unset __used)
{ {
if (!arg) caller_flag = (alloc_flag + 1);
return -1; return 0;
}
if (strcmp(arg, "alloc") == 0) static int parse_alloc_opt(const struct option *opt __used,
alloc_flag = (caller_flag + 1); const char *arg __used, int unset __used)
else if (strcmp(arg, "caller") == 0) {
caller_flag = (alloc_flag + 1); alloc_flag = (caller_flag + 1);
else
return -1;
return 0; return 0;
} }
...@@ -739,14 +721,17 @@ static int parse_line_opt(const struct option *opt __used, ...@@ -739,14 +721,17 @@ static int parse_line_opt(const struct option *opt __used,
static const struct option kmem_options[] = { static const struct option kmem_options[] = {
OPT_STRING('i', "input", &input_name, "file", OPT_STRING('i', "input", &input_name, "file",
"input file name"), "input file name"),
OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>", OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
"stat selector, Pass 'alloc' or 'caller'.", "show per-callsite statistics",
parse_stat_opt), parse_caller_opt),
OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
"show per-allocation statistics",
parse_alloc_opt),
OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
"sort by keys: ptr, call_site, bytes, hit, pingpong, frag", "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
parse_sort_opt), parse_sort_opt),
OPT_CALLBACK('l', "line", NULL, "num", OPT_CALLBACK('l', "line", NULL, "num",
"show n lins", "show n lines",
parse_line_opt), parse_line_opt),
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END() OPT_END()
...@@ -790,18 +775,22 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) ...@@ -790,18 +775,22 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
if (argc && !strncmp(argv[0], "rec", 3)) if (!argc)
return __cmd_record(argc, argv);
else if (argc)
usage_with_options(kmem_usage, kmem_options); usage_with_options(kmem_usage, kmem_options);
if (list_empty(&caller_sort)) if (!strncmp(argv[0], "rec", 3)) {
setup_sorting(&caller_sort, default_sort_order); return __cmd_record(argc, argv);
if (list_empty(&alloc_sort)) } else if (!strcmp(argv[0], "stat")) {
setup_sorting(&alloc_sort, default_sort_order); setup_cpunode_map();
if (list_empty(&caller_sort))
setup_sorting(&caller_sort, default_sort_order);
if (list_empty(&alloc_sort))
setup_sorting(&alloc_sort, default_sort_order);
setup_cpunode_map(); return __cmd_kmem();
}
return __cmd_kmem(); return 0;
} }
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "perf.h" #include "perf.h"
#include "builtin.h" #include "builtin.h"
#include "util/util.h" #include "util/util.h"
#include "util/strlist.h"
#include "util/event.h" #include "util/event.h"
#include "util/debug.h" #include "util/debug.h"
#include "util/parse-options.h" #include "util/parse-options.h"
...@@ -43,11 +44,12 @@ ...@@ -43,11 +44,12 @@
#include "util/probe-event.h" #include "util/probe-event.h"
/* Default vmlinux search paths */ /* Default vmlinux search paths */
#define NR_SEARCH_PATH 3 #define NR_SEARCH_PATH 4
const char *default_search_path[NR_SEARCH_PATH] = { const char *default_search_path[NR_SEARCH_PATH] = {
"/lib/modules/%s/build/vmlinux", /* Custom build kernel */ "/lib/modules/%s/build/vmlinux", /* Custom build kernel */
"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */ "/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */
"/boot/vmlinux-debug-%s", /* Ubuntu */ "/boot/vmlinux-debug-%s", /* Ubuntu */
"./vmlinux", /* CWD */
}; };
#define MAX_PATH_LEN 256 #define MAX_PATH_LEN 256
...@@ -60,6 +62,7 @@ static struct { ...@@ -60,6 +62,7 @@ static struct {
int need_dwarf; int need_dwarf;
int nr_probe; int nr_probe;
struct probe_point probes[MAX_PROBES]; struct probe_point probes[MAX_PROBES];
struct strlist *dellist;
} session; } session;
static bool listing; static bool listing;
...@@ -79,6 +82,25 @@ static void parse_probe_event(const char *str) ...@@ -79,6 +82,25 @@ static void parse_probe_event(const char *str)
pr_debug("%d arguments\n", pp->nr_args); pr_debug("%d arguments\n", pp->nr_args);
} }
static void parse_probe_event_argv(int argc, const char **argv)
{
int i, len;
char *buf;
/* Bind up rest arguments */
len = 0;
for (i = 0; i < argc; i++)
len += strlen(argv[i]) + 1;
buf = zalloc(len + 1);
if (!buf)
die("Failed to allocate memory for binding arguments.");
len = 0;
for (i = 0; i < argc; i++)
len += sprintf(&buf[len], "%s ", argv[i]);
parse_probe_event(buf);
free(buf);
}
static int opt_add_probe_event(const struct option *opt __used, static int opt_add_probe_event(const struct option *opt __used,
const char *str, int unset __used) const char *str, int unset __used)
{ {
...@@ -87,6 +109,17 @@ static int opt_add_probe_event(const struct option *opt __used, ...@@ -87,6 +109,17 @@ static int opt_add_probe_event(const struct option *opt __used,
return 0; return 0;
} }
static int opt_del_probe_event(const struct option *opt __used,
const char *str, int unset __used)
{
if (str) {
if (!session.dellist)
session.dellist = strlist__new(true, NULL);
strlist__add(session.dellist, str);
}
return 0;
}
#ifndef NO_LIBDWARF #ifndef NO_LIBDWARF
static int open_default_vmlinux(void) static int open_default_vmlinux(void)
{ {
...@@ -121,6 +154,7 @@ static int open_default_vmlinux(void) ...@@ -121,6 +154,7 @@ static int open_default_vmlinux(void)
static const char * const probe_usage[] = { static const char * const probe_usage[] = {
"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list", "perf probe --list",
NULL NULL
}; };
...@@ -132,7 +166,9 @@ static const struct option options[] = { ...@@ -132,7 +166,9 @@ static const struct option options[] = {
OPT_STRING('k', "vmlinux", &session.vmlinux, "file", OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
"vmlinux/module pathname"), "vmlinux/module pathname"),
#endif #endif
OPT_BOOLEAN('l', "list", &listing, "list up current probes"), OPT_BOOLEAN('l', "list", &listing, "list up current probe events"),
OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
opt_del_probe_event),
OPT_CALLBACK('a', "add", NULL, OPT_CALLBACK('a', "add", NULL,
#ifdef NO_LIBDWARF #ifdef NO_LIBDWARF
"FUNC[+OFFS|%return] [ARG ...]", "FUNC[+OFFS|%return] [ARG ...]",
...@@ -160,7 +196,7 @@ static const struct option options[] = { ...@@ -160,7 +196,7 @@ static const struct option options[] = {
int cmd_probe(int argc, const char **argv, const char *prefix __used) int cmd_probe(int argc, const char **argv, const char *prefix __used)
{ {
int i, j, ret; int i, ret;
#ifndef NO_LIBDWARF #ifndef NO_LIBDWARF
int fd; int fd;
#endif #endif
...@@ -168,40 +204,52 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ...@@ -168,40 +204,52 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
argc = parse_options(argc, argv, options, probe_usage, argc = parse_options(argc, argv, options, probe_usage,
PARSE_OPT_STOP_AT_NON_OPTION); PARSE_OPT_STOP_AT_NON_OPTION);
for (i = 0; i < argc; i++) if (argc > 0)
parse_probe_event(argv[i]); parse_probe_event_argv(argc, argv);
if ((session.nr_probe == 0 && !listing) || if ((session.nr_probe == 0 && !session.dellist && !listing))
(session.nr_probe != 0 && listing))
usage_with_options(probe_usage, options); usage_with_options(probe_usage, options);
if (listing) { if (listing) {
if (session.nr_probe != 0 || session.dellist) {
pr_warning(" Error: Don't use --list with"
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
show_perf_probe_events(); show_perf_probe_events();
return 0; return 0;
} }
if (session.dellist) {
del_trace_kprobe_events(session.dellist);
strlist__delete(session.dellist);
if (session.nr_probe == 0)
return 0;
}
if (session.need_dwarf) if (session.need_dwarf)
#ifdef NO_LIBDWARF #ifdef NO_LIBDWARF
die("Debuginfo-analysis is not supported"); die("Debuginfo-analysis is not supported");
#else /* !NO_LIBDWARF */ #else /* !NO_LIBDWARF */
pr_debug("Some probes require debuginfo.\n"); pr_debug("Some probes require debuginfo.\n");
if (session.vmlinux) if (session.vmlinux) {
pr_debug("Try to open %s.", session.vmlinux);
fd = open(session.vmlinux, O_RDONLY); fd = open(session.vmlinux, O_RDONLY);
else } else
fd = open_default_vmlinux(); fd = open_default_vmlinux();
if (fd < 0) { if (fd < 0) {
if (session.need_dwarf) if (session.need_dwarf)
die("Could not open vmlinux/module file."); die("Could not open debuginfo file.");
pr_warning("Could not open vmlinux/module file." pr_debug("Could not open vmlinux/module file."
" Try to use symbols.\n"); " Try to use symbols.\n");
goto end_dwarf; goto end_dwarf;
} }
/* Searching probe points */ /* Searching probe points */
for (j = 0; j < session.nr_probe; j++) { for (i = 0; i < session.nr_probe; i++) {
pp = &session.probes[j]; pp = &session.probes[i];
if (pp->found) if (pp->found)
continue; continue;
...@@ -223,8 +271,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ...@@ -223,8 +271,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
#endif /* !NO_LIBDWARF */ #endif /* !NO_LIBDWARF */
/* Synthesize probes without dwarf */ /* Synthesize probes without dwarf */
for (j = 0; j < session.nr_probe; j++) { for (i = 0; i < session.nr_probe; i++) {
pp = &session.probes[j]; pp = &session.probes[i];
if (pp->found) /* This probe is already found. */ if (pp->found) /* This probe is already found. */
continue; continue;
......
...@@ -605,44 +605,41 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) ...@@ -605,44 +605,41 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
static int process_sample_event(event_t *event) static int process_sample_event(event_t *event)
{ {
u64 ip = event->ip.ip; struct sample_data data;
u64 period = 1;
void *more_data = event->ip.__more_data;
struct ip_callchain *chain = NULL;
int cpumode; int cpumode;
struct addr_location al; struct addr_location al;
struct thread *thread = threads__findnew(event->ip.pid); struct thread *thread;
if (sample_type & PERF_SAMPLE_PERIOD) { memset(&data, 0, sizeof(data));
period = *(u64 *)more_data; data.period = 1;
more_data += sizeof(u64);
} event__parse_sample(event, sample_type, &data);
dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
event->header.misc, event->header.misc,
event->ip.pid, event->ip.tid, data.pid, data.tid,
(void *)(long)ip, (void *)(long)data.ip,
(long long)period); (long long)data.period);
if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (sample_type & PERF_SAMPLE_CALLCHAIN) {
unsigned int i; unsigned int i;
chain = (void *)more_data; dump_printf("... chain: nr:%Lu\n", data.callchain->nr);
dump_printf("... chain: nr:%Lu\n", chain->nr);
if (validate_chain(chain, event) < 0) { if (validate_chain(data.callchain, event) < 0) {
pr_debug("call-chain problem with event, " pr_debug("call-chain problem with event, "
"skipping it.\n"); "skipping it.\n");
return 0; return 0;
} }
if (dump_trace) { if (dump_trace) {
for (i = 0; i < chain->nr; i++) for (i = 0; i < data.callchain->nr; i++)
dump_printf("..... %2d: %016Lx\n", i, chain->ips[i]); dump_printf("..... %2d: %016Lx\n",
i, data.callchain->ips[i]);
} }
} }
thread = threads__findnew(data.pid);
if (thread == NULL) { if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n", pr_debug("problem processing %d event, skipping it.\n",
event->header.type); event->header.type);
...@@ -657,7 +654,7 @@ static int process_sample_event(event_t *event) ...@@ -657,7 +654,7 @@ static int process_sample_event(event_t *event)
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
thread__find_addr_location(thread, cpumode, thread__find_addr_location(thread, cpumode,
MAP__FUNCTION, ip, &al, NULL); MAP__FUNCTION, data.ip, &al, NULL);
/* /*
* We have to do this here as we may have a dso with no symbol hit that * We have to do this here as we may have a dso with no symbol hit that
* has a name longer than the ones with symbols sampled. * has a name longer than the ones with symbols sampled.
...@@ -675,12 +672,12 @@ static int process_sample_event(event_t *event) ...@@ -675,12 +672,12 @@ static int process_sample_event(event_t *event)
if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name)) if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name))
return 0; return 0;
if (hist_entry__add(&al, chain, period)) { if (hist_entry__add(&al, data.callchain, data.period)) {
pr_debug("problem incrementing symbol count, skipping event\n"); pr_debug("problem incrementing symbol count, skipping event\n");
return -1; return -1;
} }
event__stats.total += period; event__stats.total += data.period;
return 0; return 0;
} }
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include "util/debug.h" #include "util/debug.h"
#include "util/data_map.h" #include "util/data_map.h"
#include <sys/types.h>
#include <sys/prctl.h> #include <sys/prctl.h>
#include <semaphore.h> #include <semaphore.h>
...@@ -141,6 +140,7 @@ struct work_atoms { ...@@ -141,6 +140,7 @@ struct work_atoms {
struct thread *thread; struct thread *thread;
struct rb_node node; struct rb_node node;
u64 max_lat; u64 max_lat;
u64 max_lat_at;
u64 total_lat; u64 total_lat;
u64 nb_atoms; u64 nb_atoms;
u64 total_runtime; u64 total_runtime;
...@@ -414,34 +414,33 @@ static u64 get_cpu_usage_nsec_parent(void) ...@@ -414,34 +414,33 @@ static u64 get_cpu_usage_nsec_parent(void)
return sum; return sum;
} }
static u64 get_cpu_usage_nsec_self(void) static int self_open_counters(void)
{ {
char filename [] = "/proc/1234567890/sched"; struct perf_event_attr attr;
unsigned long msecs, nsecs; int fd;
char *line = NULL;
u64 total = 0;
size_t len = 0;
ssize_t chars;
FILE *file;
int ret;
sprintf(filename, "/proc/%d/sched", getpid()); memset(&attr, 0, sizeof(attr));
file = fopen(filename, "r");
BUG_ON(!file);
while ((chars = getline(&line, &len, file)) != -1) { attr.type = PERF_TYPE_SOFTWARE;
ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n", attr.config = PERF_COUNT_SW_TASK_CLOCK;
&msecs, &nsecs);
if (ret == 2) { fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
total = msecs*1e6 + nsecs;
break;
}
}
if (line)
free(line);
fclose(file);
return total; if (fd < 0)
die("Error: sys_perf_event_open() syscall returned"
"with %d (%s)\n", fd, strerror(errno));
return fd;
}
static u64 get_cpu_usage_nsec_self(int fd)
{
u64 runtime;
int ret;
ret = read(fd, &runtime, sizeof(runtime));
BUG_ON(ret != sizeof(runtime));
return runtime;
} }
static void *thread_func(void *ctx) static void *thread_func(void *ctx)
...@@ -450,9 +449,11 @@ static void *thread_func(void *ctx) ...@@ -450,9 +449,11 @@ static void *thread_func(void *ctx)
u64 cpu_usage_0, cpu_usage_1; u64 cpu_usage_0, cpu_usage_1;
unsigned long i, ret; unsigned long i, ret;
char comm2[22]; char comm2[22];
int fd;
sprintf(comm2, ":%s", this_task->comm); sprintf(comm2, ":%s", this_task->comm);
prctl(PR_SET_NAME, comm2); prctl(PR_SET_NAME, comm2);
fd = self_open_counters();
again: again:
ret = sem_post(&this_task->ready_for_work); ret = sem_post(&this_task->ready_for_work);
...@@ -462,16 +463,15 @@ static void *thread_func(void *ctx) ...@@ -462,16 +463,15 @@ static void *thread_func(void *ctx)
ret = pthread_mutex_unlock(&start_work_mutex); ret = pthread_mutex_unlock(&start_work_mutex);
BUG_ON(ret); BUG_ON(ret);
cpu_usage_0 = get_cpu_usage_nsec_self(); cpu_usage_0 = get_cpu_usage_nsec_self(fd);
for (i = 0; i < this_task->nr_events; i++) { for (i = 0; i < this_task->nr_events; i++) {
this_task->curr_event = i; this_task->curr_event = i;
process_sched_event(this_task, this_task->atoms[i]); process_sched_event(this_task, this_task->atoms[i]);
} }
cpu_usage_1 = get_cpu_usage_nsec_self(); cpu_usage_1 = get_cpu_usage_nsec_self(fd);
this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
ret = sem_post(&this_task->work_done_sem); ret = sem_post(&this_task->work_done_sem);
BUG_ON(ret); BUG_ON(ret);
...@@ -628,11 +628,6 @@ static void test_calibrations(void) ...@@ -628,11 +628,6 @@ static void test_calibrations(void)
printf("the sleep test took %Ld nsecs\n", T1-T0); printf("the sleep test took %Ld nsecs\n", T1-T0);
} }
struct raw_event_sample {
u32 size;
char data[0];
};
#define FILL_FIELD(ptr, field, event, data) \ #define FILL_FIELD(ptr, field, event, data) \
ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
...@@ -1019,8 +1014,10 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) ...@@ -1019,8 +1014,10 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
delta = atom->sched_in_time - atom->wake_up_time; delta = atom->sched_in_time - atom->wake_up_time;
atoms->total_lat += delta; atoms->total_lat += delta;
if (delta > atoms->max_lat) if (delta > atoms->max_lat) {
atoms->max_lat = delta; atoms->max_lat = delta;
atoms->max_lat_at = timestamp;
}
atoms->nb_atoms++; atoms->nb_atoms++;
} }
...@@ -1216,10 +1213,11 @@ static void output_lat_thread(struct work_atoms *work_list) ...@@ -1216,10 +1213,11 @@ static void output_lat_thread(struct work_atoms *work_list)
avg = work_list->total_lat / work_list->nb_atoms; avg = work_list->total_lat / work_list->nb_atoms;
printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
(double)work_list->total_runtime / 1e6, (double)work_list->total_runtime / 1e6,
work_list->nb_atoms, (double)avg / 1e6, work_list->nb_atoms, (double)avg / 1e6,
(double)work_list->max_lat / 1e6); (double)work_list->max_lat / 1e6,
(double)work_list->max_lat_at / 1e9);
} }
static int pid_cmp(struct work_atoms *l, struct work_atoms *r) static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
...@@ -1356,7 +1354,7 @@ static void sort_lat(void) ...@@ -1356,7 +1354,7 @@ static void sort_lat(void)
static struct trace_sched_handler *trace_handler; static struct trace_sched_handler *trace_handler;
static void static void
process_sched_wakeup_event(struct raw_event_sample *raw, process_sched_wakeup_event(void *data,
struct event *event, struct event *event,
int cpu __used, int cpu __used,
u64 timestamp __used, u64 timestamp __used,
...@@ -1364,13 +1362,13 @@ process_sched_wakeup_event(struct raw_event_sample *raw, ...@@ -1364,13 +1362,13 @@ process_sched_wakeup_event(struct raw_event_sample *raw,
{ {
struct trace_wakeup_event wakeup_event; struct trace_wakeup_event wakeup_event;
FILL_COMMON_FIELDS(wakeup_event, event, raw->data); FILL_COMMON_FIELDS(wakeup_event, event, data);
FILL_ARRAY(wakeup_event, comm, event, raw->data); FILL_ARRAY(wakeup_event, comm, event, data);
FILL_FIELD(wakeup_event, pid, event, raw->data); FILL_FIELD(wakeup_event, pid, event, data);
FILL_FIELD(wakeup_event, prio, event, raw->data); FILL_FIELD(wakeup_event, prio, event, data);
FILL_FIELD(wakeup_event, success, event, raw->data); FILL_FIELD(wakeup_event, success, event, data);
FILL_FIELD(wakeup_event, cpu, event, raw->data); FILL_FIELD(wakeup_event, cpu, event, data);
if (trace_handler->wakeup_event) if (trace_handler->wakeup_event)
trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
...@@ -1469,7 +1467,7 @@ map_switch_event(struct trace_switch_event *switch_event, ...@@ -1469,7 +1467,7 @@ map_switch_event(struct trace_switch_event *switch_event,
static void static void
process_sched_switch_event(struct raw_event_sample *raw, process_sched_switch_event(void *data,
struct event *event, struct event *event,
int this_cpu, int this_cpu,
u64 timestamp __used, u64 timestamp __used,
...@@ -1477,15 +1475,15 @@ process_sched_switch_event(struct raw_event_sample *raw, ...@@ -1477,15 +1475,15 @@ process_sched_switch_event(struct raw_event_sample *raw,
{ {
struct trace_switch_event switch_event; struct trace_switch_event switch_event;
FILL_COMMON_FIELDS(switch_event, event, raw->data); FILL_COMMON_FIELDS(switch_event, event, data);
FILL_ARRAY(switch_event, prev_comm, event, raw->data); FILL_ARRAY(switch_event, prev_comm, event, data);
FILL_FIELD(switch_event, prev_pid, event, raw->data); FILL_FIELD(switch_event, prev_pid, event, data);
FILL_FIELD(switch_event, prev_prio, event, raw->data); FILL_FIELD(switch_event, prev_prio, event, data);
FILL_FIELD(switch_event, prev_state, event, raw->data); FILL_FIELD(switch_event, prev_state, event, data);
FILL_ARRAY(switch_event, next_comm, event, raw->data); FILL_ARRAY(switch_event, next_comm, event, data);
FILL_FIELD(switch_event, next_pid, event, raw->data); FILL_FIELD(switch_event, next_pid, event, data);
FILL_FIELD(switch_event, next_prio, event, raw->data); FILL_FIELD(switch_event, next_prio, event, data);
if (curr_pid[this_cpu] != (u32)-1) { if (curr_pid[this_cpu] != (u32)-1) {
/* /*
...@@ -1502,7 +1500,7 @@ process_sched_switch_event(struct raw_event_sample *raw, ...@@ -1502,7 +1500,7 @@ process_sched_switch_event(struct raw_event_sample *raw,
} }
static void static void
process_sched_runtime_event(struct raw_event_sample *raw, process_sched_runtime_event(void *data,
struct event *event, struct event *event,
int cpu __used, int cpu __used,
u64 timestamp __used, u64 timestamp __used,
...@@ -1510,17 +1508,17 @@ process_sched_runtime_event(struct raw_event_sample *raw, ...@@ -1510,17 +1508,17 @@ process_sched_runtime_event(struct raw_event_sample *raw,
{ {
struct trace_runtime_event runtime_event; struct trace_runtime_event runtime_event;
FILL_ARRAY(runtime_event, comm, event, raw->data); FILL_ARRAY(runtime_event, comm, event, data);
FILL_FIELD(runtime_event, pid, event, raw->data); FILL_FIELD(runtime_event, pid, event, data);
FILL_FIELD(runtime_event, runtime, event, raw->data); FILL_FIELD(runtime_event, runtime, event, data);
FILL_FIELD(runtime_event, vruntime, event, raw->data); FILL_FIELD(runtime_event, vruntime, event, data);
if (trace_handler->runtime_event) if (trace_handler->runtime_event)
trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
} }
static void static void
process_sched_fork_event(struct raw_event_sample *raw, process_sched_fork_event(void *data,
struct event *event, struct event *event,
int cpu __used, int cpu __used,
u64 timestamp __used, u64 timestamp __used,
...@@ -1528,12 +1526,12 @@ process_sched_fork_event(struct raw_event_sample *raw, ...@@ -1528,12 +1526,12 @@ process_sched_fork_event(struct raw_event_sample *raw,
{ {
struct trace_fork_event fork_event; struct trace_fork_event fork_event;
FILL_COMMON_FIELDS(fork_event, event, raw->data); FILL_COMMON_FIELDS(fork_event, event, data);
FILL_ARRAY(fork_event, parent_comm, event, raw->data); FILL_ARRAY(fork_event, parent_comm, event, data);
FILL_FIELD(fork_event, parent_pid, event, raw->data); FILL_FIELD(fork_event, parent_pid, event, data);
FILL_ARRAY(fork_event, child_comm, event, raw->data); FILL_ARRAY(fork_event, child_comm, event, data);
FILL_FIELD(fork_event, child_pid, event, raw->data); FILL_FIELD(fork_event, child_pid, event, data);
if (trace_handler->fork_event) if (trace_handler->fork_event)
trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
...@@ -1550,7 +1548,7 @@ process_sched_exit_event(struct event *event, ...@@ -1550,7 +1548,7 @@ process_sched_exit_event(struct event *event,
} }
static void static void
process_sched_migrate_task_event(struct raw_event_sample *raw, process_sched_migrate_task_event(void *data,
struct event *event, struct event *event,
int cpu __used, int cpu __used,
u64 timestamp __used, u64 timestamp __used,
...@@ -1558,80 +1556,66 @@ process_sched_migrate_task_event(struct raw_event_sample *raw, ...@@ -1558,80 +1556,66 @@ process_sched_migrate_task_event(struct raw_event_sample *raw,
{ {
struct trace_migrate_task_event migrate_task_event; struct trace_migrate_task_event migrate_task_event;
FILL_COMMON_FIELDS(migrate_task_event, event, raw->data); FILL_COMMON_FIELDS(migrate_task_event, event, data);
FILL_ARRAY(migrate_task_event, comm, event, raw->data); FILL_ARRAY(migrate_task_event, comm, event, data);
FILL_FIELD(migrate_task_event, pid, event, raw->data); FILL_FIELD(migrate_task_event, pid, event, data);
FILL_FIELD(migrate_task_event, prio, event, raw->data); FILL_FIELD(migrate_task_event, prio, event, data);
FILL_FIELD(migrate_task_event, cpu, event, raw->data); FILL_FIELD(migrate_task_event, cpu, event, data);
if (trace_handler->migrate_task_event) if (trace_handler->migrate_task_event)
trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread); trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
} }
static void static void
process_raw_event(event_t *raw_event __used, void *more_data, process_raw_event(event_t *raw_event __used, void *data,
int cpu, u64 timestamp, struct thread *thread) int cpu, u64 timestamp, struct thread *thread)
{ {
struct raw_event_sample *raw = more_data;
struct event *event; struct event *event;
int type; int type;
type = trace_parse_common_type(raw->data);
type = trace_parse_common_type(data);
event = trace_find_event(type); event = trace_find_event(type);
if (!strcmp(event->name, "sched_switch")) if (!strcmp(event->name, "sched_switch"))
process_sched_switch_event(raw, event, cpu, timestamp, thread); process_sched_switch_event(data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_stat_runtime")) if (!strcmp(event->name, "sched_stat_runtime"))
process_sched_runtime_event(raw, event, cpu, timestamp, thread); process_sched_runtime_event(data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_wakeup")) if (!strcmp(event->name, "sched_wakeup"))
process_sched_wakeup_event(raw, event, cpu, timestamp, thread); process_sched_wakeup_event(data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_wakeup_new")) if (!strcmp(event->name, "sched_wakeup_new"))
process_sched_wakeup_event(raw, event, cpu, timestamp, thread); process_sched_wakeup_event(data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_process_fork")) if (!strcmp(event->name, "sched_process_fork"))
process_sched_fork_event(raw, event, cpu, timestamp, thread); process_sched_fork_event(data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_process_exit")) if (!strcmp(event->name, "sched_process_exit"))
process_sched_exit_event(event, cpu, timestamp, thread); process_sched_exit_event(event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_migrate_task")) if (!strcmp(event->name, "sched_migrate_task"))
process_sched_migrate_task_event(raw, event, cpu, timestamp, thread); process_sched_migrate_task_event(data, event, cpu, timestamp, thread);
} }
static int process_sample_event(event_t *event) static int process_sample_event(event_t *event)
{ {
struct sample_data data;
struct thread *thread; struct thread *thread;
u64 ip = event->ip.ip;
u64 timestamp = -1;
u32 cpu = -1;
u64 period = 1;
void *more_data = event->ip.__more_data;
if (!(sample_type & PERF_SAMPLE_RAW)) if (!(sample_type & PERF_SAMPLE_RAW))
return 0; return 0;
thread = threads__findnew(event->ip.pid); memset(&data, 0, sizeof(data));
data.time = -1;
data.cpu = -1;
data.period = -1;
if (sample_type & PERF_SAMPLE_TIME) { event__parse_sample(event, sample_type, &data);
timestamp = *(u64 *)more_data;
more_data += sizeof(u64);
}
if (sample_type & PERF_SAMPLE_CPU) {
cpu = *(u32 *)more_data;
more_data += sizeof(u32);
more_data += sizeof(u32); /* reserved */
}
if (sample_type & PERF_SAMPLE_PERIOD) {
period = *(u64 *)more_data;
more_data += sizeof(u64);
}
dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
event->header.misc, event->header.misc,
event->ip.pid, event->ip.tid, data.pid, data.tid,
(void *)(long)ip, (void *)(long)data.ip,
(long long)period); (long long)data.period);
thread = threads__findnew(data.pid);
if (thread == NULL) { if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n", pr_debug("problem processing %d event, skipping it.\n",
event->header.type); event->header.type);
...@@ -1640,10 +1624,10 @@ static int process_sample_event(event_t *event) ...@@ -1640,10 +1624,10 @@ static int process_sample_event(event_t *event)
dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
if (profile_cpu != -1 && profile_cpu != (int) cpu) if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
return 0; return 0;
process_raw_event(event, more_data, cpu, timestamp, thread); process_raw_event(event, data.raw_data, data.cpu, data.time, thread);
return 0; return 0;
} }
...@@ -1724,9 +1708,9 @@ static void __cmd_lat(void) ...@@ -1724,9 +1708,9 @@ static void __cmd_lat(void)
read_events(); read_events();
sort_lat(); sort_lat();
printf("\n -----------------------------------------------------------------------------------------\n"); printf("\n ---------------------------------------------------------------------------------------------------------------\n");
printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n"); printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
printf(" -----------------------------------------------------------------------------------------\n"); printf(" ---------------------------------------------------------------------------------------------------------------\n");
next = rb_first(&sorted_atom_root); next = rb_first(&sorted_atom_root);
...@@ -1902,13 +1886,18 @@ static int __cmd_record(int argc, const char **argv) ...@@ -1902,13 +1886,18 @@ static int __cmd_record(int argc, const char **argv)
int cmd_sched(int argc, const char **argv, const char *prefix __used) int cmd_sched(int argc, const char **argv, const char *prefix __used)
{ {
symbol__init(0);
argc = parse_options(argc, argv, sched_options, sched_usage, argc = parse_options(argc, argv, sched_options, sched_usage,
PARSE_OPT_STOP_AT_NON_OPTION); PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc) if (!argc)
usage_with_options(sched_usage, sched_options); usage_with_options(sched_usage, sched_options);
/*
* Aliased to 'perf trace' for now:
*/
if (!strcmp(argv[0], "trace"))
return cmd_trace(argc, argv, prefix);
symbol__init(0);
if (!strncmp(argv[0], "rec", 3)) { if (!strncmp(argv[0], "rec", 3)) {
return __cmd_record(argc, argv); return __cmd_record(argc, argv);
} else if (!strncmp(argv[0], "lat", 3)) { } else if (!strncmp(argv[0], "lat", 3)) {
...@@ -1932,11 +1921,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) ...@@ -1932,11 +1921,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
usage_with_options(replay_usage, replay_options); usage_with_options(replay_usage, replay_options);
} }
__cmd_replay(); __cmd_replay();
} else if (!strcmp(argv[0], "trace")) {
/*
* Aliased to 'perf trace' for now:
*/
return cmd_trace(argc, argv, prefix);
} else { } else {
usage_with_options(sched_usage, sched_options); usage_with_options(sched_usage, sched_options);
} }
......
...@@ -302,12 +302,11 @@ process_exit_event(event_t *event) ...@@ -302,12 +302,11 @@ process_exit_event(event_t *event)
} }
struct trace_entry { struct trace_entry {
u32 size;
unsigned short type; unsigned short type;
unsigned char flags; unsigned char flags;
unsigned char preempt_count; unsigned char preempt_count;
int pid; int pid;
int tgid; int lock_depth;
}; };
struct power_entry { struct power_entry {
...@@ -484,43 +483,22 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) ...@@ -484,43 +483,22 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
static int static int
process_sample_event(event_t *event) process_sample_event(event_t *event)
{ {
int cursor = 0; struct sample_data data;
u64 addr = 0;
u64 stamp = 0;
u32 cpu = 0;
u32 pid = 0;
struct trace_entry *te; struct trace_entry *te;
if (sample_type & PERF_SAMPLE_IP) memset(&data, 0, sizeof(data));
cursor++;
if (sample_type & PERF_SAMPLE_TID) {
pid = event->sample.array[cursor]>>32;
cursor++;
}
if (sample_type & PERF_SAMPLE_TIME) {
stamp = event->sample.array[cursor++];
if (!first_time || first_time > stamp) event__parse_sample(event, sample_type, &data);
first_time = stamp;
if (last_time < stamp)
last_time = stamp;
if (sample_type & PERF_SAMPLE_TIME) {
if (!first_time || first_time > data.time)
first_time = data.time;
if (last_time < data.time)
last_time = data.time;
} }
if (sample_type & PERF_SAMPLE_ADDR)
addr = event->sample.array[cursor++];
if (sample_type & PERF_SAMPLE_ID)
cursor++;
if (sample_type & PERF_SAMPLE_STREAM_ID)
cursor++;
if (sample_type & PERF_SAMPLE_CPU)
cpu = event->sample.array[cursor++] & 0xFFFFFFFF;
if (sample_type & PERF_SAMPLE_PERIOD)
cursor++;
te = (void *)&event->sample.array[cursor];
if (sample_type & PERF_SAMPLE_RAW && te->size > 0) { te = (void *)data.raw_data;
if (sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
char *event_str; char *event_str;
struct power_entry *pe; struct power_entry *pe;
...@@ -532,19 +510,19 @@ process_sample_event(event_t *event) ...@@ -532,19 +510,19 @@ process_sample_event(event_t *event)
return 0; return 0;
if (strcmp(event_str, "power:power_start") == 0) if (strcmp(event_str, "power:power_start") == 0)
c_state_start(cpu, stamp, pe->value); c_state_start(data.cpu, data.time, pe->value);
if (strcmp(event_str, "power:power_end") == 0) if (strcmp(event_str, "power:power_end") == 0)
c_state_end(cpu, stamp); c_state_end(data.cpu, data.time);
if (strcmp(event_str, "power:power_frequency") == 0) if (strcmp(event_str, "power:power_frequency") == 0)
p_state_change(cpu, stamp, pe->value); p_state_change(data.cpu, data.time, pe->value);
if (strcmp(event_str, "sched:sched_wakeup") == 0) if (strcmp(event_str, "sched:sched_wakeup") == 0)
sched_wakeup(cpu, stamp, pid, te); sched_wakeup(data.cpu, data.time, data.pid, te);
if (strcmp(event_str, "sched:sched_switch") == 0) if (strcmp(event_str, "sched:sched_switch") == 0)
sched_switch(cpu, stamp, te); sched_switch(data.cpu, data.time, te);
} }
return 0; return 0;
} }
......
...@@ -66,58 +66,40 @@ static u64 sample_type; ...@@ -66,58 +66,40 @@ static u64 sample_type;
static int process_sample_event(event_t *event) static int process_sample_event(event_t *event)
{ {
u64 ip = event->ip.ip; struct sample_data data;
u64 timestamp = -1; struct thread *thread;
u32 cpu = -1;
u64 period = 1;
void *more_data = event->ip.__more_data;
struct thread *thread = threads__findnew(event->ip.pid);
if (sample_type & PERF_SAMPLE_TIME) {
timestamp = *(u64 *)more_data;
more_data += sizeof(u64);
}
if (sample_type & PERF_SAMPLE_CPU) { memset(&data, 0, sizeof(data));
cpu = *(u32 *)more_data; data.time = -1;
more_data += sizeof(u32); data.cpu = -1;
more_data += sizeof(u32); /* reserved */ data.period = 1;
}
if (sample_type & PERF_SAMPLE_PERIOD) { event__parse_sample(event, sample_type, &data);
period = *(u64 *)more_data;
more_data += sizeof(u64);
}
dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
event->header.misc, event->header.misc,
event->ip.pid, event->ip.tid, data.pid, data.tid,
(void *)(long)ip, (void *)(long)data.ip,
(long long)period); (long long)data.period);
thread = threads__findnew(event->ip.pid);
if (thread == NULL) { if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n", pr_debug("problem processing %d event, skipping it.\n",
event->header.type); event->header.type);
return -1; return -1;
} }
dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
if (sample_type & PERF_SAMPLE_RAW) { if (sample_type & PERF_SAMPLE_RAW) {
struct {
u32 size;
char data[0];
} *raw = more_data;
/* /*
* FIXME: better resolve from pid from the struct trace_entry * FIXME: better resolve from pid from the struct trace_entry
* field, although it should be the same than this perf * field, although it should be the same than this perf
* event pid * event pid
*/ */
scripting_ops->process_event(cpu, raw->data, raw->size, scripting_ops->process_event(data.cpu, data.raw_data,
timestamp, thread->comm); data.raw_size,
data.time, thread->comm);
} }
event__stats.total += period; event__stats.total += data.period;
return 0; return 0;
} }
......
...@@ -100,11 +100,11 @@ process_event(event_t *event, unsigned long offset, unsigned long head) ...@@ -100,11 +100,11 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
} }
} }
int perf_header__read_build_ids(int input, off_t offset, off_t size) int perf_header__read_build_ids(int input, u64 offset, u64 size)
{ {
struct build_id_event bev; struct build_id_event bev;
char filename[PATH_MAX]; char filename[PATH_MAX];
off_t limit = offset + size; u64 limit = offset + size;
int err = -1; int err = -1;
while (offset < limit) { while (offset < limit) {
......
...@@ -27,6 +27,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, ...@@ -27,6 +27,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
int full_paths, int full_paths,
int *cwdlen, int *cwdlen,
char **cwd); char **cwd);
int perf_header__read_build_ids(int input, off_t offset, off_t file_size); int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
#endif #endif
...@@ -310,3 +310,70 @@ int event__preprocess_sample(const event_t *self, struct addr_location *al, ...@@ -310,3 +310,70 @@ int event__preprocess_sample(const event_t *self, struct addr_location *al,
al->level == 'H' ? "[hypervisor]" : "<not found>"); al->level == 'H' ? "[hypervisor]" : "<not found>");
return 0; return 0;
} }
int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
{
u64 *array = event->sample.array;
if (type & PERF_SAMPLE_IP) {
data->ip = event->ip.ip;
array++;
}
if (type & PERF_SAMPLE_TID) {
u32 *p = (u32 *)array;
data->pid = p[0];
data->tid = p[1];
array++;
}
if (type & PERF_SAMPLE_TIME) {
data->time = *array;
array++;
}
if (type & PERF_SAMPLE_ADDR) {
data->addr = *array;
array++;
}
if (type & PERF_SAMPLE_ID) {
data->id = *array;
array++;
}
if (type & PERF_SAMPLE_STREAM_ID) {
data->stream_id = *array;
array++;
}
if (type & PERF_SAMPLE_CPU) {
u32 *p = (u32 *)array;
data->cpu = *p;
array++;
}
if (type & PERF_SAMPLE_PERIOD) {
data->period = *array;
array++;
}
if (type & PERF_SAMPLE_READ) {
pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
return -1;
}
if (type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = (struct ip_callchain *)array;
array += 1 + data->callchain->nr;
}
if (type & PERF_SAMPLE_RAW) {
u32 *p = (u32 *)array;
data->raw_size = *p;
p++;
data->raw_data = p;
}
return 0;
}
...@@ -56,11 +56,25 @@ struct read_event { ...@@ -56,11 +56,25 @@ struct read_event {
u64 id; u64 id;
}; };
struct sample_event{ struct sample_event {
struct perf_event_header header; struct perf_event_header header;
u64 array[]; u64 array[];
}; };
struct sample_data {
u64 ip;
u32 pid, tid;
u64 time;
u64 addr;
u64 id;
u64 stream_id;
u32 cpu;
u64 period;
struct ip_callchain *callchain;
u32 raw_size;
void *raw_data;
};
#define BUILD_ID_SIZE 20 #define BUILD_ID_SIZE 20
struct build_id_event { struct build_id_event {
...@@ -155,5 +169,6 @@ int event__process_task(event_t *self); ...@@ -155,5 +169,6 @@ int event__process_task(event_t *self);
struct addr_location; struct addr_location;
int event__preprocess_sample(const event_t *self, struct addr_location *al, int event__preprocess_sample(const event_t *self, struct addr_location *al,
symbol_filter_t filter); symbol_filter_t filter);
int event__parse_sample(event_t *event, u64 type, struct sample_data *data);
#endif /* __PERF_RECORD_H */ #endif /* __PERF_RECORD_H */
...@@ -187,7 +187,9 @@ static int do_write(int fd, const void *buf, size_t size) ...@@ -187,7 +187,9 @@ static int do_write(int fd, const void *buf, size_t size)
static int __dsos__write_buildid_table(struct list_head *head, int fd) static int __dsos__write_buildid_table(struct list_head *head, int fd)
{ {
#define NAME_ALIGN 64
struct dso *pos; struct dso *pos;
static const char zero_buf[NAME_ALIGN];
list_for_each_entry(pos, head, node) { list_for_each_entry(pos, head, node) {
int err; int err;
...@@ -197,14 +199,17 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd) ...@@ -197,14 +199,17 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
if (!pos->has_build_id) if (!pos->has_build_id)
continue; continue;
len = pos->long_name_len + 1; len = pos->long_name_len + 1;
len = ALIGN(len, 64); len = ALIGN(len, NAME_ALIGN);
memset(&b, 0, sizeof(b)); memset(&b, 0, sizeof(b));
memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
b.header.size = sizeof(b) + len; b.header.size = sizeof(b) + len;
err = do_write(fd, &b, sizeof(b)); err = do_write(fd, &b, sizeof(b));
if (err < 0) if (err < 0)
return err; return err;
err = do_write(fd, pos->long_name, len); err = do_write(fd, pos->long_name, pos->long_name_len + 1);
if (err < 0)
return err;
err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
if (err < 0) if (err < 0)
return err; return err;
} }
......
...@@ -197,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) ...@@ -197,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
if (id == config) { if (id == config) {
closedir(evt_dir); closedir(evt_dir);
closedir(sys_dir); closedir(sys_dir);
path = zalloc(sizeof(path)); path = zalloc(sizeof(*path));
path->system = malloc(MAX_EVENT_LENGTH); path->system = malloc(MAX_EVENT_LENGTH);
if (!path->system) { if (!path->system) {
free(path); free(path);
...@@ -467,7 +467,6 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags) ...@@ -467,7 +467,6 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
while ((evt_ent = readdir(evt_dir))) { while ((evt_ent = readdir(evt_dir))) {
char event_opt[MAX_EVOPT_LEN + 1]; char event_opt[MAX_EVOPT_LEN + 1];
int len; int len;
unsigned int rem = MAX_EVOPT_LEN;
if (!strcmp(evt_ent->d_name, ".") if (!strcmp(evt_ent->d_name, ".")
|| !strcmp(evt_ent->d_name, "..") || !strcmp(evt_ent->d_name, "..")
...@@ -475,20 +474,12 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags) ...@@ -475,20 +474,12 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
|| !strcmp(evt_ent->d_name, "filter")) || !strcmp(evt_ent->d_name, "filter"))
continue; continue;
len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s", sys_name, len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
evt_ent->d_name); evt_ent->d_name, flags ? ":" : "",
flags ?: "");
if (len < 0) if (len < 0)
return EVT_FAILED; return EVT_FAILED;
rem -= len;
if (flags) {
if (rem < strlen(flags) + 1)
return EVT_FAILED;
strcat(event_opt, ":");
strcat(event_opt, flags);
}
if (parse_events(NULL, event_opt, 0)) if (parse_events(NULL, event_opt, 0))
return EVT_FAILED; return EVT_FAILED;
} }
......
...@@ -430,6 +430,9 @@ int usage_with_options_internal(const char * const *usagestr, ...@@ -430,6 +430,9 @@ int usage_with_options_internal(const char * const *usagestr,
pos = fprintf(stderr, " "); pos = fprintf(stderr, " ");
if (opts->short_name) if (opts->short_name)
pos += fprintf(stderr, "-%c", opts->short_name); pos += fprintf(stderr, "-%c", opts->short_name);
else
pos += fprintf(stderr, " ");
if (opts->long_name && opts->short_name) if (opts->long_name && opts->short_name)
pos += fprintf(stderr, ", "); pos += fprintf(stderr, ", ");
if (opts->long_name) if (opts->long_name)
......
...@@ -47,6 +47,9 @@ ...@@ -47,6 +47,9 @@
#define semantic_error(msg ...) die("Semantic error :" msg) #define semantic_error(msg ...) die("Semantic error :" msg)
/* If there is no space to write, returns -E2BIG. */ /* If there is no space to write, returns -E2BIG. */
static int e_snprintf(char *str, size_t size, const char *format, ...)
__attribute__((format(printf, 3, 4)));
static int e_snprintf(char *str, size_t size, const char *format, ...) static int e_snprintf(char *str, size_t size, const char *format, ...)
{ {
int ret; int ret;
...@@ -258,7 +261,7 @@ int synthesize_perf_probe_event(struct probe_point *pp) ...@@ -258,7 +261,7 @@ int synthesize_perf_probe_event(struct probe_point *pp)
ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function, ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function,
offs, pp->retprobe ? "%return" : "", line); offs, pp->retprobe ? "%return" : "", line);
else else
ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line); ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", pp->file, line);
if (ret <= 0) if (ret <= 0)
goto error; goto error;
len = ret; len = ret;
...@@ -373,14 +376,32 @@ static void clear_probe_point(struct probe_point *pp) ...@@ -373,14 +376,32 @@ static void clear_probe_point(struct probe_point *pp)
free(pp->args); free(pp->args);
for (i = 0; i < pp->found; i++) for (i = 0; i < pp->found; i++)
free(pp->probes[i]); free(pp->probes[i]);
memset(pp, 0, sizeof(pp)); memset(pp, 0, sizeof(*pp));
}
/* Show an event */
static void show_perf_probe_event(const char *group, const char *event,
const char *place, struct probe_point *pp)
{
int i;
char buf[128];
e_snprintf(buf, 128, "%s:%s", group, event);
printf(" %-40s (on %s", buf, place);
if (pp->nr_args > 0) {
printf(" with");
for (i = 0; i < pp->nr_args; i++)
printf(" %s", pp->args[i]);
}
printf(")\n");
} }
/* List up current perf-probe events */ /* List up current perf-probe events */
void show_perf_probe_events(void) void show_perf_probe_events(void)
{ {
unsigned int i; unsigned int i;
int fd; int fd, nr;
char *group, *event; char *group, *event;
struct probe_point pp; struct probe_point pp;
struct strlist *rawlist; struct strlist *rawlist;
...@@ -393,8 +414,13 @@ void show_perf_probe_events(void) ...@@ -393,8 +414,13 @@ void show_perf_probe_events(void)
for (i = 0; i < strlist__nr_entries(rawlist); i++) { for (i = 0; i < strlist__nr_entries(rawlist); i++) {
ent = strlist__entry(rawlist, i); ent = strlist__entry(rawlist, i);
parse_trace_kprobe_event(ent->s, &group, &event, &pp); parse_trace_kprobe_event(ent->s, &group, &event, &pp);
/* Synthesize only event probe point */
nr = pp.nr_args;
pp.nr_args = 0;
synthesize_perf_probe_event(&pp); synthesize_perf_probe_event(&pp);
printf("[%s:%s]\t%s\n", group, event, pp.probes[0]); pp.nr_args = nr;
/* Show an event */
show_perf_probe_event(group, event, pp.probes[0], &pp);
free(group); free(group);
free(event); free(event);
clear_probe_point(&pp); clear_probe_point(&pp);
...@@ -404,21 +430,28 @@ void show_perf_probe_events(void) ...@@ -404,21 +430,28 @@ void show_perf_probe_events(void)
} }
/* Get current perf-probe event names */ /* Get current perf-probe event names */
static struct strlist *get_perf_event_names(int fd) static struct strlist *get_perf_event_names(int fd, bool include_group)
{ {
unsigned int i; unsigned int i;
char *group, *event; char *group, *event;
char buf[128];
struct strlist *sl, *rawlist; struct strlist *sl, *rawlist;
struct str_node *ent; struct str_node *ent;
rawlist = get_trace_kprobe_event_rawlist(fd); rawlist = get_trace_kprobe_event_rawlist(fd);
sl = strlist__new(false, NULL); sl = strlist__new(true, NULL);
for (i = 0; i < strlist__nr_entries(rawlist); i++) { for (i = 0; i < strlist__nr_entries(rawlist); i++) {
ent = strlist__entry(rawlist, i); ent = strlist__entry(rawlist, i);
parse_trace_kprobe_event(ent->s, &group, &event, NULL); parse_trace_kprobe_event(ent->s, &group, &event, NULL);
strlist__add(sl, event); if (include_group) {
if (e_snprintf(buf, 128, "%s:%s", group, event) < 0)
die("Failed to copy group:event name.");
strlist__add(sl, buf);
} else
strlist__add(sl, event);
free(group); free(group);
free(event);
} }
strlist__delete(rawlist); strlist__delete(rawlist);
...@@ -426,24 +459,30 @@ static struct strlist *get_perf_event_names(int fd) ...@@ -426,24 +459,30 @@ static struct strlist *get_perf_event_names(int fd)
return sl; return sl;
} }
static int write_trace_kprobe_event(int fd, const char *buf) static void write_trace_kprobe_event(int fd, const char *buf)
{ {
int ret; int ret;
pr_debug("Writing event: %s\n", buf);
ret = write(fd, buf, strlen(buf)); ret = write(fd, buf, strlen(buf));
if (ret <= 0) if (ret <= 0)
die("Failed to create event."); die("Failed to write event: %s", strerror(errno));
else
printf("Added new event: %s\n", buf);
return ret;
} }
static void get_new_event_name(char *buf, size_t len, const char *base, static void get_new_event_name(char *buf, size_t len, const char *base,
struct strlist *namelist) struct strlist *namelist)
{ {
int i, ret; int i, ret;
for (i = 0; i < MAX_EVENT_INDEX; i++) {
/* Try no suffix */
ret = e_snprintf(buf, len, "%s", base);
if (ret < 0)
die("snprintf() failed: %s", strerror(-ret));
if (!strlist__has_entry(namelist, buf))
return;
/* Try to add suffix */
for (i = 1; i < MAX_EVENT_INDEX; i++) {
ret = e_snprintf(buf, len, "%s_%d", base, i); ret = e_snprintf(buf, len, "%s_%d", base, i);
if (ret < 0) if (ret < 0)
die("snprintf() failed: %s", strerror(-ret)); die("snprintf() failed: %s", strerror(-ret));
...@@ -464,7 +503,7 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes) ...@@ -464,7 +503,7 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
fd = open_kprobe_events(O_RDWR, O_APPEND); fd = open_kprobe_events(O_RDWR, O_APPEND);
/* Get current event names */ /* Get current event names */
namelist = get_perf_event_names(fd); namelist = get_perf_event_names(fd, false);
for (j = 0; j < nr_probes; j++) { for (j = 0; j < nr_probes; j++) {
pp = probes + j; pp = probes + j;
...@@ -476,9 +515,73 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes) ...@@ -476,9 +515,73 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
PERFPROBE_GROUP, event, PERFPROBE_GROUP, event,
pp->probes[i]); pp->probes[i]);
write_trace_kprobe_event(fd, buf); write_trace_kprobe_event(fd, buf);
printf("Added new event:\n");
/* Get the first parameter (probe-point) */
sscanf(pp->probes[i], "%s", buf);
show_perf_probe_event(PERFPROBE_GROUP, event,
buf, pp);
/* Add added event name to namelist */ /* Add added event name to namelist */
strlist__add(namelist, event); strlist__add(namelist, event);
} }
} }
/* Show how to use the event. */
printf("\nYou can now use it on all perf tools, such as:\n\n");
printf("\tperf record -e %s:%s -a sleep 1\n\n", PERFPROBE_GROUP, event);
strlist__delete(namelist);
close(fd);
}
static void del_trace_kprobe_event(int fd, const char *group,
const char *event, struct strlist *namelist)
{
char buf[128];
if (e_snprintf(buf, 128, "%s:%s", group, event) < 0)
die("Failed to copy event.");
if (!strlist__has_entry(namelist, buf)) {
pr_warning("Warning: event \"%s\" is not found.\n", buf);
return;
}
/* Convert from perf-probe event to trace-kprobe event */
if (e_snprintf(buf, 128, "-:%s/%s", group, event) < 0)
die("Failed to copy event.");
write_trace_kprobe_event(fd, buf);
printf("Remove event: %s:%s\n", group, event);
}
void del_trace_kprobe_events(struct strlist *dellist)
{
int fd;
unsigned int i;
const char *group, *event;
char *p, *str;
struct str_node *ent;
struct strlist *namelist;
fd = open_kprobe_events(O_RDWR, O_APPEND);
/* Get current event names */
namelist = get_perf_event_names(fd, true);
for (i = 0; i < strlist__nr_entries(dellist); i++) {
ent = strlist__entry(dellist, i);
str = strdup(ent->s);
if (!str)
die("Failed to copy event.");
p = strchr(str, ':');
if (p) {
group = str;
*p = '\0';
event = p + 1;
} else {
group = PERFPROBE_GROUP;
event = str;
}
del_trace_kprobe_event(fd, group, event, namelist);
free(str);
}
strlist__delete(namelist);
close(fd); close(fd);
} }
...@@ -10,6 +10,7 @@ extern void parse_trace_kprobe_event(const char *str, char **group, ...@@ -10,6 +10,7 @@ extern void parse_trace_kprobe_event(const char *str, char **group,
char **event, struct probe_point *pp); char **event, struct probe_point *pp);
extern int synthesize_trace_kprobe_event(struct probe_point *pp); extern int synthesize_trace_kprobe_event(struct probe_point *pp);
extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes); extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes);
extern void del_trace_kprobe_events(struct strlist *dellist);
extern void show_perf_probe_events(void); extern void show_perf_probe_events(void);
/* Maximum index number of event-name postfix */ /* Maximum index number of event-name postfix */
......
...@@ -106,7 +106,7 @@ static int strtailcmp(const char *s1, const char *s2) ...@@ -106,7 +106,7 @@ static int strtailcmp(const char *s1, const char *s2)
{ {
int i1 = strlen(s1); int i1 = strlen(s1);
int i2 = strlen(s2); int i2 = strlen(s2);
while (--i1 > 0 && --i2 > 0) { while (--i1 >= 0 && --i2 >= 0) {
if (s1[i1] != s2[i2]) if (s1[i1] != s2[i2])
return s1[i1] - s2[i2]; return s1[i1] - s2[i2];
} }
......
...@@ -938,8 +938,9 @@ static bool __dsos__read_build_ids(struct list_head *head) ...@@ -938,8 +938,9 @@ static bool __dsos__read_build_ids(struct list_head *head)
bool dsos__read_build_ids(void) bool dsos__read_build_ids(void)
{ {
return __dsos__read_build_ids(&dsos__kernel) || bool kbuildids = __dsos__read_build_ids(&dsos__kernel),
__dsos__read_build_ids(&dsos__user); ubuildids = __dsos__read_build_ids(&dsos__user);
return kbuildids || ubuildids;
} }
/* /*
......
...@@ -177,7 +177,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused) ...@@ -177,7 +177,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
func_count++; func_count++;
} }
func_list = malloc_or_die(sizeof(*func_list) * func_count + 1); func_list = malloc_or_die(sizeof(*func_list) * (func_count + 1));
i = 0; i = 0;
while (list) { while (list) {
...@@ -1477,7 +1477,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) ...@@ -1477,7 +1477,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
goto out_free; goto out_free;
field = malloc_or_die(sizeof(*field)); field = malloc_or_die(sizeof(*field));
memset(field, 0, sizeof(field)); memset(field, 0, sizeof(*field));
value = arg_eval(arg); value = arg_eval(arg);
field->value = strdup(value); field->value = strdup(value);
......
...@@ -32,9 +32,6 @@ ...@@ -32,9 +32,6 @@
void xs_init(pTHX); void xs_init(pTHX);
void boot_Perf__Trace__Context(pTHX_ CV *cv);
void boot_DynaLoader(pTHX_ CV *cv);
void xs_init(pTHX) void xs_init(pTHX)
{ {
const char *file = __FILE__; const char *file = __FILE__;
...@@ -573,26 +570,72 @@ struct scripting_ops perl_scripting_ops = { ...@@ -573,26 +570,72 @@ struct scripting_ops perl_scripting_ops = {
.generate_script = perl_generate_script, .generate_script = perl_generate_script,
}; };
#ifdef NO_LIBPERL static void print_unsupported_msg(void)
void setup_perl_scripting(void)
{ {
fprintf(stderr, "Perl scripting not supported." fprintf(stderr, "Perl scripting not supported."
" Install libperl and rebuild perf to enable it. e.g. " " Install libperl and rebuild perf to enable it.\n"
"apt-get install libperl-dev (ubuntu), yum install " "For example:\n # apt-get install libperl-dev (ubuntu)"
"perl-ExtUtils-Embed (Fedora), etc.\n"); "\n # yum install perl-ExtUtils-Embed (Fedora)"
"\n etc.\n");
} }
#else
void setup_perl_scripting(void) static int perl_start_script_unsupported(const char *script __unused)
{
print_unsupported_msg();
return -1;
}
static int perl_stop_script_unsupported(void)
{
return 0;
}
static void perl_process_event_unsupported(int cpu __unused,
void *data __unused,
int size __unused,
unsigned long long nsecs __unused,
char *comm __unused)
{
}
static int perl_generate_script_unsupported(const char *outfile __unused)
{
print_unsupported_msg();
return -1;
}
struct scripting_ops perl_scripting_unsupported_ops = {
.name = "Perl",
.start_script = perl_start_script_unsupported,
.stop_script = perl_stop_script_unsupported,
.process_event = perl_process_event_unsupported,
.generate_script = perl_generate_script_unsupported,
};
static void register_perl_scripting(struct scripting_ops *scripting_ops)
{ {
int err; int err;
err = script_spec_register("Perl", &perl_scripting_ops); err = script_spec_register("Perl", scripting_ops);
if (err) if (err)
die("error registering Perl script extension"); die("error registering Perl script extension");
err = script_spec_register("pl", &perl_scripting_ops); err = script_spec_register("pl", scripting_ops);
if (err) if (err)
die("error registering pl script extension"); die("error registering pl script extension");
scripting_context = malloc(sizeof(struct scripting_context)); scripting_context = malloc(sizeof(struct scripting_context));
} }
#ifdef NO_LIBPERL
void setup_perl_scripting(void)
{
register_perl_scripting(&perl_scripting_unsupported_ops);
}
#else
void setup_perl_scripting(void)
{
register_perl_scripting(&perl_scripting_ops);
}
#endif #endif
...@@ -34,9 +34,13 @@ typedef int INTERP; ...@@ -34,9 +34,13 @@ typedef int INTERP;
#define dXSUB_SYS #define dXSUB_SYS
#define pTHX_ #define pTHX_
static inline void newXS(const char *a, void *b, const char *c) {} static inline void newXS(const char *a, void *b, const char *c) {}
static void boot_Perf__Trace__Context(pTHX_ CV *cv) {}
static void boot_DynaLoader(pTHX_ CV *cv) {}
#else #else
#include <EXTERN.h> #include <EXTERN.h>
#include <perl.h> #include <perl.h>
void boot_Perf__Trace__Context(pTHX_ CV *cv);
void boot_DynaLoader(pTHX_ CV *cv);
typedef PerlInterpreter * INTERP; typedef PerlInterpreter * INTERP;
#endif #endif
......
...@@ -145,8 +145,9 @@ static void read_proc_kallsyms(void) ...@@ -145,8 +145,9 @@ static void read_proc_kallsyms(void)
if (!size) if (!size)
return; return;
buf = malloc_or_die(size); buf = malloc_or_die(size + 1);
read_or_die(buf, size); read_or_die(buf, size);
buf[size] = '\0';
parse_proc_kallsyms(buf, size); parse_proc_kallsyms(buf, size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment