Commit a042e261 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  perf python scripting: Add futex-contention script
  perf python scripting: Fixup cut'n'paste error in sctop script
  perf scripting: Shut up 'perf record' final status
  perf record: Remove newline character from perror() argument
  perf python scripting: Support fedora 11 (audit 1.7.17)
  perf python scripting: Improve the syscalls-by-pid script
  perf python scripting: print the syscall name on sctop
  perf python scripting: Improve the syscalls-counts script
  perf python scripting: Improve the failed-syscalls-by-pid script
  kprobes: Remove redundant text_mutex lock in optimize
  x86/oprofile: Fix uninitialized variable use in debug printk
  tracing: Fix 'faild' -> 'failed' typo
  perf probe: Fix format specified for Dwarf_Off parameter
  perf trace: Fix detection of script extension
  perf trace: Use $PERF_EXEC_PATH in canned report scripts
  perf tools: Document event modifiers
  perf tools: Remove direct slang.h include
  perf_events: Fix for transaction recovery in group_sched_in()
  perf_events: Revert: Fix transaction recovery in group_sched_in()
  perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
  ...
parents f66dd539 e25804a0
...@@ -121,6 +121,7 @@ ...@@ -121,6 +121,7 @@
#define MSR_AMD64_IBSDCLINAD 0xc0011038 #define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 #define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
/* Fam 10h MSRs */ /* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
......
...@@ -122,6 +122,7 @@ union cpuid10_edx { ...@@ -122,6 +122,7 @@ union cpuid10_edx {
#define IBS_OP_VAL (1ULL<<18) #define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17) #define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void); extern void init_hw_perf_events(void);
......
...@@ -237,6 +237,7 @@ struct x86_pmu { ...@@ -237,6 +237,7 @@ struct x86_pmu {
* Intel DebugStore bits * Intel DebugStore bits
*/ */
int bts, pebs; int bts, pebs;
int bts_active, pebs_active;
int pebs_record_size; int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs); void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints; struct event_constraint *pebs_constraints;
...@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {} ...@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}
#endif #endif
static int reserve_ds_buffers(void); static void reserve_ds_buffers(void);
static void release_ds_buffers(void); static void release_ds_buffers(void);
static void hw_perf_event_destroy(struct perf_event *event) static void hw_perf_event_destroy(struct perf_event *event)
...@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event) ...@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
(hwc->sample_period == 1)) { (hwc->sample_period == 1)) {
/* BTS is not supported by this architecture. */ /* BTS is not supported by this architecture. */
if (!x86_pmu.bts) if (!x86_pmu.bts_active)
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */ /* BTS is currently only allowed for user-mode. */
...@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event) ...@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
int precise = 0; int precise = 0;
/* Support for constant skid */ /* Support for constant skid */
if (x86_pmu.pebs) if (x86_pmu.pebs_active) {
precise++; precise++;
/* Support for IP fixup */ /* Support for IP fixup */
if (x86_pmu.lbr_nr) if (x86_pmu.lbr_nr)
precise++; precise++;
}
if (event->attr.precise_ip > precise) if (event->attr.precise_ip > precise)
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event) ...@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
if (atomic_read(&active_events) == 0) { if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware()) if (!reserve_pmc_hardware())
err = -EBUSY; err = -EBUSY;
else { else
err = reserve_ds_buffers(); reserve_ds_buffers();
if (err)
release_pmc_hardware();
}
} }
if (!err) if (!err)
atomic_inc(&active_events); atomic_inc(&active_events);
......
...@@ -74,58 +74,57 @@ static void fini_debug_store_on_cpu(int cpu) ...@@ -74,58 +74,57 @@ static void fini_debug_store_on_cpu(int cpu)
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
} }
static void release_ds_buffers(void) static int alloc_pebs_buffer(int cpu)
{ {
int cpu; struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
if (!x86_pmu.bts && !x86_pmu.pebs) int max, thresh = 1; /* always use a single PEBS record */
return; void *buffer;
get_online_cpus();
for_each_online_cpu(cpu) if (!x86_pmu.pebs)
fini_debug_store_on_cpu(cpu); return 0;
for_each_possible_cpu(cpu) { buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; if (unlikely(!buffer))
return -ENOMEM;
if (!ds) max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
continue;
per_cpu(cpu_hw_events, cpu).ds = NULL; ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
kfree((void *)(unsigned long)ds->pebs_buffer_base); ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
kfree((void *)(unsigned long)ds->bts_buffer_base); thresh * x86_pmu.pebs_record_size;
kfree(ds);
}
put_online_cpus(); return 0;
} }
static int reserve_ds_buffers(void) static void release_pebs_buffer(int cpu)
{ {
int cpu, err = 0; struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!x86_pmu.bts && !x86_pmu.pebs) if (!ds || !x86_pmu.pebs)
return 0; return;
get_online_cpus(); kfree((void *)(unsigned long)ds->pebs_buffer_base);
ds->pebs_buffer_base = 0;
}
for_each_possible_cpu(cpu) { static int alloc_bts_buffer(int cpu)
struct debug_store *ds; {
void *buffer; struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh; int max, thresh;
void *buffer;
err = -ENOMEM; if (!x86_pmu.bts)
ds = kzalloc(sizeof(*ds), GFP_KERNEL); return 0;
if (unlikely(!ds))
break;
per_cpu(cpu_hw_events, cpu).ds = ds;
if (x86_pmu.bts) { buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
if (unlikely(!buffer)) if (unlikely(!buffer))
break; return -ENOMEM;
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16; thresh = max / 16;
...@@ -136,39 +135,125 @@ static int reserve_ds_buffers(void) ...@@ -136,39 +135,125 @@ static int reserve_ds_buffers(void)
max * BTS_RECORD_SIZE; max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum - ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE; thresh * BTS_RECORD_SIZE;
return 0;
}
static void release_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds || !x86_pmu.bts)
return;
kfree((void *)(unsigned long)ds->bts_buffer_base);
ds->bts_buffer_base = 0;
}
static int alloc_ds_buffer(int cpu)
{
int node = cpu_to_node(cpu);
struct debug_store *ds;
ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!ds))
return -ENOMEM;
per_cpu(cpu_hw_events, cpu).ds = ds;
return 0;
}
static void release_ds_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
return;
per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree(ds);
}
static void release_ds_buffers(void)
{
int cpu;
if (!x86_pmu.bts && !x86_pmu.pebs)
return;
get_online_cpus();
for_each_online_cpu(cpu)
fini_debug_store_on_cpu(cpu);
for_each_possible_cpu(cpu) {
release_pebs_buffer(cpu);
release_bts_buffer(cpu);
release_ds_buffer(cpu);
} }
put_online_cpus();
}
if (x86_pmu.pebs) { static void reserve_ds_buffers(void)
buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); {
if (unlikely(!buffer)) int bts_err = 0, pebs_err = 0;
break; int cpu;
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; x86_pmu.bts_active = 0;
x86_pmu.pebs_active = 0;
ds->pebs_buffer_base = (u64)(unsigned long)buffer; if (!x86_pmu.bts && !x86_pmu.pebs)
ds->pebs_index = ds->pebs_buffer_base; return;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size; if (!x86_pmu.bts)
/* bts_err = 1;
* Always use single record PEBS
*/ if (!x86_pmu.pebs)
ds->pebs_interrupt_threshold = ds->pebs_buffer_base + pebs_err = 1;
x86_pmu.pebs_record_size;
get_online_cpus();
for_each_possible_cpu(cpu) {
if (alloc_ds_buffer(cpu)) {
bts_err = 1;
pebs_err = 1;
}
if (!bts_err && alloc_bts_buffer(cpu))
bts_err = 1;
if (!pebs_err && alloc_pebs_buffer(cpu))
pebs_err = 1;
if (bts_err && pebs_err)
break;
}
if (bts_err) {
for_each_possible_cpu(cpu)
release_bts_buffer(cpu);
} }
err = 0; if (pebs_err) {
for_each_possible_cpu(cpu)
release_pebs_buffer(cpu);
} }
if (err) if (bts_err && pebs_err) {
release_ds_buffers(); for_each_possible_cpu(cpu)
else { release_ds_buffer(cpu);
} else {
if (x86_pmu.bts && !bts_err)
x86_pmu.bts_active = 1;
if (x86_pmu.pebs && !pebs_err)
x86_pmu.pebs_active = 1;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
init_debug_store_on_cpu(cpu); init_debug_store_on_cpu(cpu);
} }
put_online_cpus(); put_online_cpus();
return err;
} }
/* /*
...@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void) ...@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
if (!event) if (!event)
return 0; return 0;
if (!ds) if (!x86_pmu.bts_active)
return 0; return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
...@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) ...@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
struct pebs_record_core *at, *top; struct pebs_record_core *at, *top;
int n; int n;
if (!ds || !x86_pmu.pebs) if (!x86_pmu.pebs_active)
return; return;
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
...@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ...@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
u64 status = 0; u64 status = 0;
int bit, n; int bit, n;
if (!ds || !x86_pmu.pebs) if (!x86_pmu.pebs_active)
return; return;
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
...@@ -630,9 +715,8 @@ static void intel_ds_init(void) ...@@ -630,9 +715,8 @@ static void intel_ds_init(void)
#else /* CONFIG_CPU_SUP_INTEL */ #else /* CONFIG_CPU_SUP_INTEL */
static int reserve_ds_buffers(void) static void reserve_ds_buffers(void)
{ {
return 0;
} }
static void release_ds_buffers(void) static void release_ds_buffers(void)
......
...@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops) ...@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
case 0x11: case 0x11:
cpu_type = "x86-64/family11h"; cpu_type = "x86-64/family11h";
break; break;
case 0x12:
cpu_type = "x86-64/family12h";
break;
case 0x14:
cpu_type = "x86-64/family14h";
break;
default: default:
return -ENODEV; return -ENODEV;
} }
......
...@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; ...@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
static u32 ibs_caps; static u32 ibs_caps;
struct op_ibs_config { struct ibs_config {
unsigned long op_enabled; unsigned long op_enabled;
unsigned long fetch_enabled; unsigned long fetch_enabled;
unsigned long max_cnt_fetch; unsigned long max_cnt_fetch;
unsigned long max_cnt_op; unsigned long max_cnt_op;
unsigned long rand_en; unsigned long rand_en;
unsigned long dispatched_ops; unsigned long dispatched_ops;
unsigned long branch_target;
}; };
static struct op_ibs_config ibs_config; struct ibs_state {
static u64 ibs_op_ctl; u64 ibs_op_ctl;
int branch_target;
unsigned long sample_size;
};
static struct ibs_config ibs_config;
static struct ibs_state ibs_state;
/* /*
* IBS cpuid feature detection * IBS cpuid feature detection
...@@ -71,8 +78,16 @@ static u64 ibs_op_ctl; ...@@ -71,8 +78,16 @@ static u64 ibs_op_ctl;
* bit 0 is used to indicate the existence of IBS. * bit 0 is used to indicate the existence of IBS.
*/ */
#define IBS_CAPS_AVAIL (1U<<0) #define IBS_CAPS_AVAIL (1U<<0)
#define IBS_CAPS_FETCHSAM (1U<<1)
#define IBS_CAPS_OPSAM (1U<<2)
#define IBS_CAPS_RDWROPCNT (1U<<3) #define IBS_CAPS_RDWROPCNT (1U<<3)
#define IBS_CAPS_OPCNT (1U<<4) #define IBS_CAPS_OPCNT (1U<<4)
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
| IBS_CAPS_OPSAM)
/* /*
* IBS APIC setup * IBS APIC setup
...@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void) ...@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)
/* check IBS cpuid feature flags */ /* check IBS cpuid feature flags */
max_level = cpuid_eax(0x80000000); max_level = cpuid_eax(0x80000000);
if (max_level < IBS_CPUID_FEATURES) if (max_level < IBS_CPUID_FEATURES)
return IBS_CAPS_AVAIL; return IBS_CAPS_DEFAULT;
ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
if (!(ibs_caps & IBS_CAPS_AVAIL)) if (!(ibs_caps & IBS_CAPS_AVAIL))
/* cpuid flags not valid */ /* cpuid flags not valid */
return IBS_CAPS_AVAIL; return IBS_CAPS_DEFAULT;
return ibs_caps; return ibs_caps;
} }
...@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs, ...@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsrl(MSR_AMD64_IBSOPCTL, ctl); rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
if (ctl & IBS_OP_VAL) { if (ctl & IBS_OP_VAL) {
rdmsrl(MSR_AMD64_IBSOPRIP, val); rdmsrl(MSR_AMD64_IBSOPRIP, val);
oprofile_write_reserve(&entry, regs, val, oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
IBS_OP_CODE, IBS_OP_SIZE); ibs_state.sample_size);
oprofile_add_data64(&entry, val); oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA, val); rdmsrl(MSR_AMD64_IBSOPDATA, val);
oprofile_add_data64(&entry, val); oprofile_add_data64(&entry, val);
...@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs, ...@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
oprofile_add_data64(&entry, val); oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
oprofile_add_data64(&entry, val); oprofile_add_data64(&entry, val);
if (ibs_state.branch_target) {
rdmsrl(MSR_AMD64_IBSBRTARGET, val);
oprofile_add_data(&entry, (unsigned long)val);
}
oprofile_write_commit(&entry); oprofile_write_commit(&entry);
/* reenable the IRQ */ /* reenable the IRQ */
ctl = op_amd_randomize_ibs_op(ibs_op_ctl); ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, ctl); wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
} }
} }
...@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void) ...@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)
if (!ibs_caps) if (!ibs_caps)
return; return;
memset(&ibs_state, 0, sizeof(ibs_state));
/*
* Note: Since the max count settings may out of range we
* write back the actual used values so that userland can read
* it.
*/
if (ibs_config.fetch_enabled) { if (ibs_config.fetch_enabled) {
val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; val = ibs_config.max_cnt_fetch >> 4;
val = min(val, IBS_FETCH_MAX_CNT);
ibs_config.max_cnt_fetch = val << 4;
val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
val |= IBS_FETCH_ENABLE; val |= IBS_FETCH_ENABLE;
wrmsrl(MSR_AMD64_IBSFETCHCTL, val); wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
} }
if (ibs_config.op_enabled) { if (ibs_config.op_enabled) {
ibs_op_ctl = ibs_config.max_cnt_op >> 4; val = ibs_config.max_cnt_op >> 4;
if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
/* /*
* IbsOpCurCnt not supported. See * IbsOpCurCnt not supported. See
* op_amd_randomize_ibs_op() for details. * op_amd_randomize_ibs_op() for details.
*/ */
ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); val = clamp(val, 0x0081ULL, 0xFF80ULL);
ibs_config.max_cnt_op = val << 4;
} else { } else {
/* /*
* The start value is randomized with a * The start value is randomized with a
...@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void) ...@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)
* with the half of the randomized range. Also * with the half of the randomized range. Also
* avoid underflows. * avoid underflows.
*/ */
ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, val += IBS_RANDOM_MAXCNT_OFFSET;
IBS_OP_MAX_CNT); if (ibs_caps & IBS_CAPS_OPCNTEXT)
val = min(val, IBS_OP_MAX_CNT_EXT);
else
val = min(val, IBS_OP_MAX_CNT);
ibs_config.max_cnt_op =
(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
}
val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
val |= IBS_OP_ENABLE;
ibs_state.ibs_op_ctl = val;
ibs_state.sample_size = IBS_OP_SIZE;
if (ibs_config.branch_target) {
ibs_state.branch_target = 1;
ibs_state.sample_size++;
} }
if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
ibs_op_ctl |= IBS_OP_CNT_CTL;
ibs_op_ctl |= IBS_OP_ENABLE;
val = op_amd_randomize_ibs_op(ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, val); wrmsrl(MSR_AMD64_IBSOPCTL, val);
} }
} }
...@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset) ...@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)
static inline int ibs_eilvt_valid(void) static inline int ibs_eilvt_valid(void)
{ {
u64 val;
int offset; int offset;
u64 val;
rdmsrl(MSR_AMD64_IBSCTL, val); rdmsrl(MSR_AMD64_IBSCTL, val);
offset = val & IBSCTL_LVT_OFFSET_MASK;
if (!(val & IBSCTL_LVT_OFFSET_VALID)) { if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
pr_err(FW_BUG "cpu %d, invalid IBS " pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
"interrupt offset %d (MSR%08X=0x%016llx)", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
smp_processor_id(), offset,
MSR_AMD64_IBSCTL, val);
return 0; return 0;
} }
offset = val & IBSCTL_LVT_OFFSET_MASK; if (!eilvt_is_available(offset)) {
pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
if (eilvt_is_available(offset)) smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
return !0;
pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
"not available (MSR%08X=0x%016llx)",
smp_processor_id(), offset,
MSR_AMD64_IBSCTL, val);
return 0; return 0;
}
return 1;
} }
static inline int get_ibs_offset(void) static inline int get_ibs_offset(void)
...@@ -630,12 +667,11 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ...@@ -630,12 +667,11 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
/* model specific files */ /* model specific files */
/* setup some reasonable defaults */ /* setup some reasonable defaults */
memset(&ibs_config, 0, sizeof(ibs_config));
ibs_config.max_cnt_fetch = 250000; ibs_config.max_cnt_fetch = 250000;
ibs_config.fetch_enabled = 0;
ibs_config.max_cnt_op = 250000; ibs_config.max_cnt_op = 250000;
ibs_config.op_enabled = 0;
ibs_config.dispatched_ops = 0;
if (ibs_caps & IBS_CAPS_FETCHSAM) {
dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
oprofilefs_create_ulong(sb, dir, "enable", oprofilefs_create_ulong(sb, dir, "enable",
&ibs_config.fetch_enabled); &ibs_config.fetch_enabled);
...@@ -643,7 +679,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ...@@ -643,7 +679,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
&ibs_config.max_cnt_fetch); &ibs_config.max_cnt_fetch);
oprofilefs_create_ulong(sb, dir, "rand_enable", oprofilefs_create_ulong(sb, dir, "rand_enable",
&ibs_config.rand_en); &ibs_config.rand_en);
}
if (ibs_caps & IBS_CAPS_OPSAM) {
dir = oprofilefs_mkdir(sb, root, "ibs_op"); dir = oprofilefs_mkdir(sb, root, "ibs_op");
oprofilefs_create_ulong(sb, dir, "enable", oprofilefs_create_ulong(sb, dir, "enable",
&ibs_config.op_enabled); &ibs_config.op_enabled);
...@@ -652,6 +690,10 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ...@@ -652,6 +690,10 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
if (ibs_caps & IBS_CAPS_OPCNT) if (ibs_caps & IBS_CAPS_OPCNT)
oprofilefs_create_ulong(sb, dir, "dispatched_ops", oprofilefs_create_ulong(sb, dir, "dispatched_ops",
&ibs_config.dispatched_ops); &ibs_config.dispatched_ops);
if (ibs_caps & IBS_CAPS_BRNTRGT)
oprofilefs_create_ulong(sb, dir, "branch_target",
&ibs_config.branch_target);
}
return 0; return 0;
} }
......
...@@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *)); ...@@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
extern void softirq_init(void); extern void softirq_init(void);
static inline void __raise_softirq_irqoff(unsigned int nr) static inline void __raise_softirq_irqoff(unsigned int nr)
{ {
trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); trace_softirq_raise(nr);
or_softirq_pending(1UL << nr); or_softirq_pending(1UL << nr);
} }
......
...@@ -62,18 +62,6 @@ enum ring_buffer_type { ...@@ -62,18 +62,6 @@ enum ring_buffer_type {
unsigned ring_buffer_event_length(struct ring_buffer_event *event); unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event); void *ring_buffer_event_data(struct ring_buffer_event *event);
/**
* ring_buffer_event_time_delta - return the delta timestamp of the event
* @event: the event to get the delta timestamp of
*
* The delta timestamp is the 27 bit timestamp since the last event.
*/
static inline unsigned
ring_buffer_event_time_delta(struct ring_buffer_event *event)
{
return event->time_delta;
}
/* /*
* ring_buffer_discard_commit will remove an event that has not * ring_buffer_discard_commit will remove an event that has not
* ben committed yet. If this is used, then ring_buffer_unlock_commit * ben committed yet. If this is used, then ring_buffer_unlock_commit
......
...@@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit, ...@@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit,
DECLARE_EVENT_CLASS(softirq, DECLARE_EVENT_CLASS(softirq,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec), TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec), TP_ARGS(vec_nr),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( int, vec ) __field( unsigned int, vec )
), ),
TP_fast_assign( TP_fast_assign(
if (vec) __entry->vec = vec_nr;
__entry->vec = (int)(h - vec);
else
__entry->vec = (int)(long)h;
), ),
TP_printk("vec=%d [action=%s]", __entry->vec, TP_printk("vec=%u [action=%s]", __entry->vec,
show_softirq_name(__entry->vec)) show_softirq_name(__entry->vec))
); );
/** /**
* softirq_entry - called immediately before the softirq handler * softirq_entry - called immediately before the softirq handler
* @h: pointer to struct softirq_action * @vec_nr: softirq vector number
* @vec: pointer to first struct softirq_action in softirq_vec array
* *
* The @h parameter, contains a pointer to the struct softirq_action * When used in combination with the softirq_exit tracepoint
* which has a pointer to the action handler that is called. By subtracting * we can determine the softirq handler runtine.
* the @vec pointer from the @h pointer, we can determine the softirq
* number. Also, when used in combination with the softirq_exit tracepoint
* we can determine the softirq latency.
*/ */
DEFINE_EVENT(softirq, softirq_entry, DEFINE_EVENT(softirq, softirq_entry,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec), TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec) TP_ARGS(vec_nr)
); );
/** /**
* softirq_exit - called immediately after the softirq handler returns * softirq_exit - called immediately after the softirq handler returns
* @h: pointer to struct softirq_action * @vec_nr: softirq vector number
* @vec: pointer to first struct softirq_action in softirq_vec array
* *
* The @h parameter contains a pointer to the struct softirq_action * When used in combination with the softirq_entry tracepoint
* that has handled the softirq. By subtracting the @vec pointer from * we can determine the softirq handler runtine.
* the @h pointer, we can determine the softirq number. Also, when used in
* combination with the softirq_entry tracepoint we can determine the softirq
* latency.
*/ */
DEFINE_EVENT(softirq, softirq_exit, DEFINE_EVENT(softirq, softirq_exit,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec), TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec) TP_ARGS(vec_nr)
); );
/** /**
* softirq_raise - called immediately when a softirq is raised * softirq_raise - called immediately when a softirq is raised
* @h: pointer to struct softirq_action * @vec_nr: softirq vector number
* @vec: pointer to first struct softirq_action in softirq_vec array
* *
* The @h parameter contains a pointer to the softirq vector number which is * When used in combination with the softirq_entry tracepoint
* raised. @vec is NULL and it means @h includes vector number not * we can determine the softirq raise to run latency.
* softirq_action. When used in combination with the softirq_entry tracepoint
* we can determine the softirq raise latency.
*/ */
DEFINE_EVENT(softirq, softirq_raise, DEFINE_EVENT(softirq, softirq_raise,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec), TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec) TP_ARGS(vec_nr)
); );
#endif /* _TRACE_IRQ_H */ #endif /* _TRACE_IRQ_H */
......
...@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; ...@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */ /* NOTE: change this value only with kprobe_mutex held */
static bool kprobes_all_disarmed; static bool kprobes_all_disarmed;
static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ /* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct { static struct {
spinlock_t lock ____cacheline_aligned_in_smp; spinlock_t lock ____cacheline_aligned_in_smp;
...@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) ...@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
} }
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
/* This should be called with kprobe_mutex locked */
static void __kprobes optimize_all_kprobes(void) static void __kprobes optimize_all_kprobes(void)
{ {
struct hlist_head *head; struct hlist_head *head;
...@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void) ...@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)
return; return;
kprobes_allow_optimization = true; kprobes_allow_optimization = true;
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) { for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i]; head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) hlist_for_each_entry_rcu(p, node, head, hlist)
if (!kprobe_disabled(p)) if (!kprobe_disabled(p))
optimize_kprobe(p); optimize_kprobe(p);
} }
mutex_unlock(&text_mutex);
printk(KERN_INFO "Kprobes globally optimized\n"); printk(KERN_INFO "Kprobes globally optimized\n");
} }
/* This should be called with kprobe_mutex locked */
static void __kprobes unoptimize_all_kprobes(void) static void __kprobes unoptimize_all_kprobes(void)
{ {
struct hlist_head *head; struct hlist_head *head;
......
...@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event) ...@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)
return event->cpu == -1 || event->cpu == smp_processor_id(); return event->cpu == -1 || event->cpu == smp_processor_id();
} }
static int static void
__event_sched_out(struct perf_event *event, event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx, struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx) struct perf_event_context *ctx)
{ {
...@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event, ...@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,
} }
if (event->state != PERF_EVENT_STATE_ACTIVE) if (event->state != PERF_EVENT_STATE_ACTIVE)
return 0; return;
event->state = PERF_EVENT_STATE_INACTIVE; event->state = PERF_EVENT_STATE_INACTIVE;
if (event->pending_disable) { if (event->pending_disable) {
event->pending_disable = 0; event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF; event->state = PERF_EVENT_STATE_OFF;
} }
event->tstamp_stopped = ctx->time;
event->pmu->del(event, 0); event->pmu->del(event, 0);
event->oncpu = -1; event->oncpu = -1;
...@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event, ...@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,
ctx->nr_active--; ctx->nr_active--;
if (event->attr.exclusive || !cpuctx->active_oncpu) if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0; cpuctx->exclusive = 0;
return 1;
}
static void
event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
int ret;
ret = __event_sched_out(event, cpuctx, ctx);
if (ret)
event->tstamp_stopped = ctx->time;
} }
static void static void
...@@ -664,7 +652,7 @@ void perf_event_disable(struct perf_event *event) ...@@ -664,7 +652,7 @@ void perf_event_disable(struct perf_event *event)
} }
static int static int
__event_sched_in(struct perf_event *event, event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx, struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx) struct perf_event_context *ctx)
{ {
...@@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event, ...@@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event,
return -EAGAIN; return -EAGAIN;
} }
event->tstamp_running += ctx->time - event->tstamp_stopped;
if (!is_software_event(event)) if (!is_software_event(event))
cpuctx->active_oncpu++; cpuctx->active_oncpu++;
ctx->nr_active++; ctx->nr_active++;
...@@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event, ...@@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event,
return 0; return 0;
} }
static inline int
event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
int ret = __event_sched_in(event, cpuctx, ctx);
if (ret)
return ret;
event->tstamp_running += ctx->time - event->tstamp_stopped;
return 0;
}
static void
group_commit_event_sched_in(struct perf_event *group_event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
struct perf_event *event;
u64 now = ctx->time;
group_event->tstamp_running += now - group_event->tstamp_stopped;
/*
* Schedule in siblings as one group (if any):
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
event->tstamp_running += now - event->tstamp_stopped;
}
}
static int static int
group_sched_in(struct perf_event *group_event, group_sched_in(struct perf_event *group_event,
struct perf_cpu_context *cpuctx, struct perf_cpu_context *cpuctx,
...@@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event, ...@@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event,
{ {
struct perf_event *event, *partial_group = NULL; struct perf_event *event, *partial_group = NULL;
struct pmu *pmu = group_event->pmu; struct pmu *pmu = group_event->pmu;
u64 now = ctx->time;
bool simulate = false;
if (group_event->state == PERF_EVENT_STATE_OFF) if (group_event->state == PERF_EVENT_STATE_OFF)
return 0; return 0;
pmu->start_txn(pmu); pmu->start_txn(pmu);
/* if (event_sched_in(group_event, cpuctx, ctx)) {
* use __event_sched_in() to delay updating tstamp_running
* until the transaction is committed. In case of failure
* we will keep an unmodified tstamp_running which is a
* requirement to get correct timing information
*/
if (__event_sched_in(group_event, cpuctx, ctx)) {
pmu->cancel_txn(pmu); pmu->cancel_txn(pmu);
return -EAGAIN; return -EAGAIN;
} }
...@@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event, ...@@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event,
* Schedule in siblings as one group (if any): * Schedule in siblings as one group (if any):
*/ */
list_for_each_entry(event, &group_event->sibling_list, group_entry) { list_for_each_entry(event, &group_event->sibling_list, group_entry) {
if (__event_sched_in(event, cpuctx, ctx)) { if (event_sched_in(event, cpuctx, ctx)) {
partial_group = event; partial_group = event;
goto group_error; goto group_error;
} }
} }
if (!pmu->commit_txn(pmu)) { if (!pmu->commit_txn(pmu))
/* commit tstamp_running */
group_commit_event_sched_in(group_event, cpuctx, ctx);
return 0; return 0;
}
group_error: group_error:
/* /*
* Groups can be scheduled in as one unit only, so undo any * Groups can be scheduled in as one unit only, so undo any
* partial group before returning: * partial group before returning:
* The events up to the failed event are scheduled out normally,
* tstamp_stopped will be updated.
* *
* use __event_sched_out() to avoid updating tstamp_stopped * The failed events and the remaining siblings need to have
* because the event never actually ran * their timings updated as if they had gone thru event_sched_in()
* and event_sched_out(). This is required to get consistent timings
* across the group. This also takes care of the case where the group
* could never be scheduled by ensuring tstamp_stopped is set to mark
* the time the event was actually stopped, such that time delta
* calculation in update_event_times() is correct.
*/ */
list_for_each_entry(event, &group_event->sibling_list, group_entry) { list_for_each_entry(event, &group_event->sibling_list, group_entry) {
if (event == partial_group) if (event == partial_group)
break; simulate = true;
__event_sched_out(event, cpuctx, ctx);
if (simulate) {
event->tstamp_running += now - event->tstamp_stopped;
event->tstamp_stopped = now;
} else {
event_sched_out(event, cpuctx, ctx);
}
} }
__event_sched_out(group_event, cpuctx, ctx); event_sched_out(group_event, cpuctx, ctx);
pmu->cancel_txn(pmu); pmu->cancel_txn(pmu);
......
...@@ -229,18 +229,20 @@ asmlinkage void __do_softirq(void) ...@@ -229,18 +229,20 @@ asmlinkage void __do_softirq(void)
do { do {
if (pending & 1) { if (pending & 1) {
unsigned int vec_nr = h - softirq_vec;
int prev_count = preempt_count(); int prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(h - softirq_vec);
trace_softirq_entry(h, softirq_vec); kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h->action(h); h->action(h);
trace_softirq_exit(h, softirq_vec); trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) { if (unlikely(prev_count != preempt_count())) {
printk(KERN_ERR "huh, entered softirq %td %s %p" printk(KERN_ERR "huh, entered softirq %u %s %p"
"with preempt_count %08x," "with preempt_count %08x,"
" exited with %08x?\n", h - softirq_vec, " exited with %08x?\n", vec_nr,
softirq_to_name[h - softirq_vec], softirq_to_name[vec_nr], h->action,
h->action, prev_count, preempt_count()); prev_count, preempt_count());
preempt_count() = prev_count; preempt_count() = prev_count;
} }
......
...@@ -224,6 +224,9 @@ enum { ...@@ -224,6 +224,9 @@ enum {
RB_LEN_TIME_STAMP = 16, RB_LEN_TIME_STAMP = 16,
}; };
#define skip_time_extend(event) \
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
static inline int rb_null_event(struct ring_buffer_event *event) static inline int rb_null_event(struct ring_buffer_event *event)
{ {
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
...@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) ...@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
return length + RB_EVNT_HDR_SIZE; return length + RB_EVNT_HDR_SIZE;
} }
/* inline for ring buffer fast paths */ /*
static unsigned * Return the length of the given event. Will return
* the length of the time extend if the event is a
* time extend.
*/
static inline unsigned
rb_event_length(struct ring_buffer_event *event) rb_event_length(struct ring_buffer_event *event)
{ {
switch (event->type_len) { switch (event->type_len) {
...@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) ...@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
return 0; return 0;
} }
/*
* Return total length of time extend and data,
* or just the event length for all other events.
*/
static inline unsigned
rb_event_ts_length(struct ring_buffer_event *event)
{
unsigned len = 0;
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
/* time extends include the data event after it */
len = RB_LEN_TIME_EXTEND;
event = skip_time_extend(event);
}
return len + rb_event_length(event);
}
/** /**
* ring_buffer_event_length - return the length of the event * ring_buffer_event_length - return the length of the event
* @event: the event to get the length of * @event: the event to get the length of
*
* Returns the size of the data load of a data event.
* If the event is something other than a data event, it
* returns the size of the event itself. With the exception
* of a TIME EXTEND, where it still returns the size of the
* data load of the data event after it.
*/ */
unsigned ring_buffer_event_length(struct ring_buffer_event *event) unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{ {
unsigned length = rb_event_length(event); unsigned length;
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
length = rb_event_length(event);
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
return length; return length;
length -= RB_EVNT_HDR_SIZE; length -= RB_EVNT_HDR_SIZE;
...@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); ...@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
static void * static void *
rb_event_data(struct ring_buffer_event *event) rb_event_data(struct ring_buffer_event *event)
{ {
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */ /* If length is in len field, then array[0] has the data */
if (event->type_len) if (event->type_len)
...@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta) ...@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
/* Max number of timestamps that can fit on a page */
#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
int ring_buffer_print_page_header(struct trace_seq *s) int ring_buffer_print_page_header(struct trace_seq *s)
{ {
struct buffer_data_page field; struct buffer_data_page field;
...@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) ...@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0; iter->head = 0;
} }
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
/* Not the first event on the page? */
if (rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
return skip_time_extend(event);
}
/** /**
* ring_buffer_update_event - update event type and data * ring_buffer_update_event - update event type and data
* @event: the even to update * @event: the even to update
...@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) ...@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
* data field. * data field.
*/ */
static void static void
rb_update_event(struct ring_buffer_event *event, rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned length) struct ring_buffer_event *event, unsigned length,
int add_timestamp, u64 delta)
{ {
event->type_len = type; /* Only a commit updates the timestamp */
if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
switch (type) { delta = 0;
case RINGBUF_TYPE_PADDING: /*
case RINGBUF_TYPE_TIME_EXTEND: * If we need to add a timestamp, then we
case RINGBUF_TYPE_TIME_STAMP: * add it to the start of the resevered space.
break; */
if (unlikely(add_timestamp)) {
event = rb_add_time_stamp(event, delta);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
case 0: event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE; length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length; event->array[0] = length;
else } else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
break;
default:
BUG();
}
} }
/* /*
...@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
local_sub(length, &tail_page->write); local_sub(length, &tail_page->write);
} }
static struct ring_buffer_event * /*
* This is the slow path, force gcc not to inline it.
*/
static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail, unsigned long length, unsigned long tail,
struct buffer_page *tail_page, u64 *ts) struct buffer_page *tail_page, u64 ts)
{ {
struct buffer_page *commit_page = cpu_buffer->commit_page; struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer; struct ring_buffer *buffer = cpu_buffer->buffer;
...@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
* Nested commits always have zero deltas, so * Nested commits always have zero deltas, so
* just reread the time stamp * just reread the time stamp
*/ */
*ts = rb_time_stamp(buffer); ts = rb_time_stamp(buffer);
next_page->page->time_stamp = *ts; next_page->page->time_stamp = ts;
} }
out_again: out_again:
...@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_event * static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned long length, u64 *ts) unsigned long length, u64 ts,
u64 delta, int add_timestamp)
{ {
struct buffer_page *tail_page; struct buffer_page *tail_page;
struct ring_buffer_event *event; struct ring_buffer_event *event;
unsigned long tail, write; unsigned long tail, write;
/*
* If the time delta since the last event is too big to
* hold in the time field of the event, then we append a
* TIME EXTEND event ahead of the data event.
*/
if (unlikely(add_timestamp))
length += RB_LEN_TIME_EXTEND;
tail_page = cpu_buffer->tail_page; tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write); write = local_add_return(length, &tail_page->write);
...@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
tail = write - length; tail = write - length;
/* See if we shot pass the end of this buffer page */ /* See if we shot pass the end of this buffer page */
if (write > BUF_PAGE_SIZE) if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail, return rb_move_tail(cpu_buffer, length, tail,
tail_page, ts); tail_page, ts);
...@@ -1951,10 +2019,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1951,10 +2019,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail); event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield); kmemcheck_annotate_bitfield(event, bitfield);
rb_update_event(event, type, length); rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
/* The passed in type is zero for DATA */
if (likely(!type))
local_inc(&tail_page->entries); local_inc(&tail_page->entries);
/* /*
...@@ -1962,7 +2028,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1962,7 +2028,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
* its timestamp. * its timestamp.
*/ */
if (!tail) if (!tail)
tail_page->page->time_stamp = *ts; tail_page->page->time_stamp = ts;
return event; return event;
} }
...@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long addr; unsigned long addr;
new_index = rb_event_index(event); new_index = rb_event_index(event);
old_index = new_index + rb_event_length(event); old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event; addr = (unsigned long)event;
addr &= PAGE_MASK; addr &= PAGE_MASK;
...@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
return 0; return 0;
} }
static int
rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
u64 *ts, u64 *delta)
{
struct ring_buffer_event *event;
int ret;
WARN_ONCE(*delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
(unsigned long long)*delta,
(unsigned long long)*ts,
(unsigned long long)cpu_buffer->write_stamp);
/*
* The delta is too big, we to add a
* new timestamp.
*/
event = __rb_reserve_next(cpu_buffer,
RINGBUF_TYPE_TIME_EXTEND,
RB_LEN_TIME_EXTEND,
ts);
if (!event)
return -EBUSY;
if (PTR_ERR(event) == -EAGAIN)
return -EAGAIN;
/* Only a commited time event can update the write stamp */
if (rb_event_is_commit(cpu_buffer, event)) {
/*
* If this is the first on the page, then it was
* updated with the page itself. Try to discard it
* and if we can't just make it zero.
*/
if (rb_event_index(event)) {
event->time_delta = *delta & TS_MASK;
event->array[0] = *delta >> TS_SHIFT;
} else {
/* try to discard, since we do not need this */
if (!rb_try_to_discard(cpu_buffer, event)) {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
}
cpu_buffer->write_stamp = *ts;
/* let the caller know this was the commit */
ret = 1;
} else {
/* Try to discard the event */
if (!rb_try_to_discard(cpu_buffer, event)) {
/* Darn, this is just wasted space */
event->time_delta = 0;
event->array[0] = 0;
}
ret = 0;
}
*delta = 0;
return ret;
}
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{ {
local_inc(&cpu_buffer->committing); local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits); local_inc(&cpu_buffer->commits);
} }
static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{ {
unsigned long commits; unsigned long commits;
...@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, ...@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
unsigned long length) unsigned long length)
{ {
struct ring_buffer_event *event; struct ring_buffer_event *event;
u64 ts, delta = 0; u64 ts, delta;
int commit = 0;
int nr_loops = 0; int nr_loops = 0;
int add_timestamp;
u64 diff;
rb_start_commit(cpu_buffer); rb_start_commit(cpu_buffer);
...@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, ...@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
length = rb_calculate_event_length(length); length = rb_calculate_event_length(length);
again: again:
add_timestamp = 0;
delta = 0;
/* /*
* We allow for interrupts to reenter here and do a trace. * We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop * If one does, it will cause this original code to loop
...@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer, ...@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
goto out_fail; goto out_fail;
ts = rb_time_stamp(cpu_buffer->buffer); ts = rb_time_stamp(cpu_buffer->buffer);
/*
* Only the first commit can update the timestamp.
* Yes there is a race here. If an interrupt comes in
* just after the conditional and it traces too, then it
* will also check the deltas. More than one timestamp may
* also be made. But only the entry that did the actual
* commit will be something other than zero.
*/
if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
rb_page_write(cpu_buffer->tail_page) ==
rb_commit_index(cpu_buffer))) {
u64 diff;
diff = ts - cpu_buffer->write_stamp; diff = ts - cpu_buffer->write_stamp;
/* make sure this diff is calculated here */ /* make sure this diff is calculated here */
barrier(); barrier();
/* Did the write stamp get updated already? */ /* Did the write stamp get updated already? */
if (unlikely(ts < cpu_buffer->write_stamp)) if (likely(ts >= cpu_buffer->write_stamp)) {
goto get_event;
delta = diff; delta = diff;
if (unlikely(test_time_stamp(delta))) { if (unlikely(test_time_stamp(delta))) {
WARN_ONCE(delta > (1ULL << 59),
commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
if (commit == -EBUSY) (unsigned long long)delta,
goto out_fail; (unsigned long long)ts,
(unsigned long long)cpu_buffer->write_stamp);
if (commit == -EAGAIN) add_timestamp = 1;
goto again;
RB_WARN_ON(cpu_buffer, commit < 0);
} }
} }
get_event: event = __rb_reserve_next(cpu_buffer, length, ts,
event = __rb_reserve_next(cpu_buffer, 0, length, &ts); delta, add_timestamp);
if (unlikely(PTR_ERR(event) == -EAGAIN)) if (unlikely(PTR_ERR(event) == -EAGAIN))
goto again; goto again;
if (!event) if (!event)
goto out_fail; goto out_fail;
if (!rb_event_is_commit(cpu_buffer, event))
delta = 0;
event->time_delta = delta;
return event; return event;
out_fail: out_fail:
...@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, ...@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
#define TRACE_RECURSIVE_DEPTH 16 #define TRACE_RECURSIVE_DEPTH 16
static int trace_recursive_lock(void) /* Keep this code out of the fast path cache */
static noinline void trace_recursive_fail(void)
{ {
current->trace_recursion++;
if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
return 0;
/* Disable all tracing before we do anything else */ /* Disable all tracing before we do anything else */
tracing_off_permanent(); tracing_off_permanent();
...@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void) ...@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
in_nmi()); in_nmi());
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
}
static inline int trace_recursive_lock(void)
{
current->trace_recursion++;
if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
return 0;
trace_recursive_fail();
return -1; return -1;
} }
static void trace_recursive_unlock(void) static inline void trace_recursive_unlock(void)
{ {
WARN_ON_ONCE(!current->trace_recursion); WARN_ON_ONCE(!current->trace_recursion);
...@@ -2308,12 +2298,28 @@ static void ...@@ -2308,12 +2298,28 @@ static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event) struct ring_buffer_event *event)
{ {
u64 delta;
/* /*
* The event first in the commit queue updates the * The event first in the commit queue updates the
* time stamp. * time stamp.
*/ */
if (rb_event_is_commit(cpu_buffer, event)) if (rb_event_is_commit(cpu_buffer, event)) {
/*
* A commit event that is first on a page
* updates the write timestamp with the page stamp
*/
if (!rb_event_index(event))
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
delta = event->array[0];
delta <<= TS_SHIFT;
delta += event->time_delta;
cpu_buffer->write_stamp += delta;
} else
cpu_buffer->write_stamp += event->time_delta; cpu_buffer->write_stamp += event->time_delta;
}
} }
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
...@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); ...@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static inline void rb_event_discard(struct ring_buffer_event *event) static inline void rb_event_discard(struct ring_buffer_event *event)
{ {
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */ /* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING; event->type_len = RINGBUF_TYPE_PADDING;
...@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, ...@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
again: again:
/* /*
* We repeat when a timestamp is encountered. It is possible * We repeat when a time extend is encountered.
* to get multiple timestamps from an interrupt entering just * Since the time extend is always attached to a data event,
* as one timestamp is about to be written, or from discarded * we should never loop more than once.
* commits. The most that we can have is the number on a single page. * (We never hit the following condition more than twice).
*/ */
if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL; return NULL;
reader = rb_get_reader_page(cpu_buffer); reader = rb_get_reader_page(cpu_buffer);
...@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) ...@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return NULL; return NULL;
/* /*
* We repeat when a timestamp is encountered. * We repeat when a time extend is encountered.
* We can get multiple timestamps by nested interrupts or also * Since the time extend is always attached to a data event,
* if filtering is on (discarding commits). Since discarding * we should never loop more than once.
* commits can be frequent we can get a lot of timestamps. * (We never hit the following condition more than twice).
* But we limit them by not adding timestamps if they begin
* at the start of a page.
*/ */
if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL; return NULL;
if (rb_per_cpu_empty(cpu_buffer)) if (rb_per_cpu_empty(cpu_buffer))
...@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, ...@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
if (len > (commit - read)) if (len > (commit - read))
len = (commit - read); len = (commit - read);
size = rb_event_length(event); /* Always keep the time extend and data together */
size = rb_event_ts_length(event);
if (len < size) if (len < size)
goto out_unlock; goto out_unlock;
...@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, ...@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
break; break;
event = rb_reader_event(cpu_buffer); event = rb_reader_event(cpu_buffer);
size = rb_event_length(event); /* Always keep the time extend and data together */
size = rb_event_ts_length(event);
} while (len > size); } while (len > size);
/* update bpage */ /* update bpage */
......
...@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu) ...@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
{ {
struct dentry *d_percpu = tracing_dentry_percpu(); struct dentry *d_percpu = tracing_dentry_percpu();
struct dentry *d_cpu; struct dentry *d_cpu;
/* strlen(cpu) + MAX(log10(cpu)) + '\0' */ char cpu_dir[30]; /* 30 characters should be more than enough */
char cpu_dir[7];
if (cpu > 999 || cpu < 0) snprintf(cpu_dir, 30, "cpu%ld", cpu);
return;
sprintf(cpu_dir, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu); d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) { if (!d_cpu) {
pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
......
...@@ -15,6 +15,23 @@ DESCRIPTION ...@@ -15,6 +15,23 @@ DESCRIPTION
This command displays the symbolic event types which can be selected in the This command displays the symbolic event types which can be selected in the
various perf commands with the -e option. various perf commands with the -e option.
EVENT MODIFIERS
---------------
Events can optionally have a modifer by appending a colon and one or
more modifiers. Modifiers allow the user to restrict when events are
counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier is currently only implemented for
Intel PEBS and can be specified multiple times:
0 - SAMPLE_IP can have arbitrary skid
1 - SAMPLE_IP must have constant skid
2 - SAMPLE_IP requested to have 0 skid
3 - SAMPLE_IP must have 0 skid
The PEBS implementation now supports up to 2.
RAW HARDWARE EVENT DESCRIPTOR RAW HARDWARE EVENT DESCRIPTOR
----------------------------- -----------------------------
Even when an event is not available in a symbolic form within perf right now, Even when an event is not available in a symbolic form within perf right now,
......
...@@ -16,7 +16,9 @@ or ...@@ -16,7 +16,9 @@ or
or or
'perf probe' --list 'perf probe' --list
or or
'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' 'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
or
'perf probe' [options] --vars='PROBEPOINT'
DESCRIPTION DESCRIPTION
----------- -----------
...@@ -31,6 +33,11 @@ OPTIONS ...@@ -31,6 +33,11 @@ OPTIONS
--vmlinux=PATH:: --vmlinux=PATH::
Specify vmlinux path which has debuginfo (Dwarf binary). Specify vmlinux path which has debuginfo (Dwarf binary).
-m::
--module=MODNAME::
Specify module name in which perf-probe searches probe points
or lines.
-s:: -s::
--source=PATH:: --source=PATH::
Specify path to kernel source. Specify path to kernel source.
...@@ -57,6 +64,15 @@ OPTIONS ...@@ -57,6 +64,15 @@ OPTIONS
Show source code lines which can be probed. This needs an argument Show source code lines which can be probed. This needs an argument
which specifies a range of the source code. (see LINE SYNTAX for detail) which specifies a range of the source code. (see LINE SYNTAX for detail)
-V::
--vars=::
Show available local variables at given probe point. The argument
syntax is same as PROBE SYNTAX, but NO ARGs.
--externs::
(Only for --vars) Show external defined variables in addition to local
variables.
-f:: -f::
--force:: --force::
Forcibly add events with existing name. Forcibly add events with existing name.
......
...@@ -83,6 +83,10 @@ OPTIONS ...@@ -83,6 +83,10 @@ OPTIONS
--call-graph:: --call-graph::
Do call-graph (stack chain/backtrace) recording. Do call-graph (stack chain/backtrace) recording.
-q::
--quiet::
Don't print any message, useful for scripting.
-v:: -v::
--verbose:: --verbose::
Be more verbose (show counter open errors, etc). Be more verbose (show counter open errors, etc).
......
...@@ -50,14 +50,17 @@ static struct { ...@@ -50,14 +50,17 @@ static struct {
bool list_events; bool list_events;
bool force_add; bool force_add;
bool show_lines; bool show_lines;
bool show_vars;
bool show_ext_vars;
bool mod_events;
int nevents; int nevents;
struct perf_probe_event events[MAX_PROBES]; struct perf_probe_event events[MAX_PROBES];
struct strlist *dellist; struct strlist *dellist;
struct line_range line_range; struct line_range line_range;
const char *target_module;
int max_probe_points; int max_probe_points;
} params; } params;
/* Parse an event definition. Note that any error must die. */ /* Parse an event definition. Note that any error must die. */
static int parse_probe_event(const char *str) static int parse_probe_event(const char *str)
{ {
...@@ -92,6 +95,7 @@ static int parse_probe_event_argv(int argc, const char **argv) ...@@ -92,6 +95,7 @@ static int parse_probe_event_argv(int argc, const char **argv)
len = 0; len = 0;
for (i = 0; i < argc; i++) for (i = 0; i < argc; i++)
len += sprintf(&buf[len], "%s ", argv[i]); len += sprintf(&buf[len], "%s ", argv[i]);
params.mod_events = true;
ret = parse_probe_event(buf); ret = parse_probe_event(buf);
free(buf); free(buf);
return ret; return ret;
...@@ -100,9 +104,10 @@ static int parse_probe_event_argv(int argc, const char **argv) ...@@ -100,9 +104,10 @@ static int parse_probe_event_argv(int argc, const char **argv)
static int opt_add_probe_event(const struct option *opt __used, static int opt_add_probe_event(const struct option *opt __used,
const char *str, int unset __used) const char *str, int unset __used)
{ {
if (str) if (str) {
params.mod_events = true;
return parse_probe_event(str); return parse_probe_event(str);
else } else
return 0; return 0;
} }
...@@ -110,6 +115,7 @@ static int opt_del_probe_event(const struct option *opt __used, ...@@ -110,6 +115,7 @@ static int opt_del_probe_event(const struct option *opt __used,
const char *str, int unset __used) const char *str, int unset __used)
{ {
if (str) { if (str) {
params.mod_events = true;
if (!params.dellist) if (!params.dellist)
params.dellist = strlist__new(true, NULL); params.dellist = strlist__new(true, NULL);
strlist__add(params.dellist, str); strlist__add(params.dellist, str);
...@@ -130,6 +136,25 @@ static int opt_show_lines(const struct option *opt __used, ...@@ -130,6 +136,25 @@ static int opt_show_lines(const struct option *opt __used,
return ret; return ret;
} }
static int opt_show_vars(const struct option *opt __used,
const char *str, int unset __used)
{
struct perf_probe_event *pev = &params.events[params.nevents];
int ret;
if (!str)
return 0;
ret = parse_probe_event(str);
if (!ret && pev->nargs != 0) {
pr_err(" Error: '--vars' doesn't accept arguments.\n");
return -EINVAL;
}
params.show_vars = true;
return ret;
}
#endif #endif
static const char * const probe_usage[] = { static const char * const probe_usage[] = {
...@@ -138,7 +163,8 @@ static const char * const probe_usage[] = { ...@@ -138,7 +163,8 @@ static const char * const probe_usage[] = {
"perf probe [<options>] --del '[GROUP:]EVENT' ...", "perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list", "perf probe --list",
#ifdef DWARF_SUPPORT #ifdef DWARF_SUPPORT
"perf probe --line 'LINEDESC'", "perf probe [<options>] --line 'LINEDESC'",
"perf probe [<options>] --vars 'PROBEPOINT'",
#endif #endif
NULL NULL
}; };
...@@ -180,10 +206,17 @@ static const struct option options[] = { ...@@ -180,10 +206,17 @@ static const struct option options[] = {
OPT_CALLBACK('L', "line", NULL, OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]", "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines), "Show source code lines.", opt_show_lines),
OPT_CALLBACK('V', "vars", NULL,
"FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
"Show accessible variables on PROBEDEF", opt_show_vars),
OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
"Show external variables too (with --vars only)"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"), "file", "vmlinux pathname"),
OPT_STRING('s', "source", &symbol_conf.source_prefix, OPT_STRING('s', "source", &symbol_conf.source_prefix,
"directory", "path to kernel source"), "directory", "path to kernel source"),
OPT_STRING('m', "module", &params.target_module,
"modname", "target module name"),
#endif #endif
OPT__DRY_RUN(&probe_event_dry_run), OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &params.max_probe_points, OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
...@@ -217,7 +250,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ...@@ -217,7 +250,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
usage_with_options(probe_usage, options); usage_with_options(probe_usage, options);
if (params.list_events) { if (params.list_events) {
if (params.nevents != 0 || params.dellist) { if (params.mod_events) {
pr_err(" Error: Don't use --list with --add/--del.\n"); pr_err(" Error: Don't use --list with --add/--del.\n");
usage_with_options(probe_usage, options); usage_with_options(probe_usage, options);
} }
...@@ -225,6 +258,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ...@@ -225,6 +258,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
pr_err(" Error: Don't use --list with --line.\n"); pr_err(" Error: Don't use --list with --line.\n");
usage_with_options(probe_usage, options); usage_with_options(probe_usage, options);
} }
if (params.show_vars) {
pr_err(" Error: Don't use --list with --vars.\n");
usage_with_options(probe_usage, options);
}
ret = show_perf_probe_events(); ret = show_perf_probe_events();
if (ret < 0) if (ret < 0)
pr_err(" Error: Failed to show event list. (%d)\n", pr_err(" Error: Failed to show event list. (%d)\n",
...@@ -234,17 +271,35 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ...@@ -234,17 +271,35 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
#ifdef DWARF_SUPPORT #ifdef DWARF_SUPPORT
if (params.show_lines) { if (params.show_lines) {
if (params.nevents != 0 || params.dellist) { if (params.mod_events) {
pr_warning(" Error: Don't use --line with" pr_err(" Error: Don't use --line with"
" --add/--del.\n"); " --add/--del.\n");
usage_with_options(probe_usage, options); usage_with_options(probe_usage, options);
} }
if (params.show_vars) {
pr_err(" Error: Don't use --line with --vars.\n");
usage_with_options(probe_usage, options);
}
ret = show_line_range(&params.line_range); ret = show_line_range(&params.line_range, params.target_module);
if (ret < 0) if (ret < 0)
pr_err(" Error: Failed to show lines. (%d)\n", ret); pr_err(" Error: Failed to show lines. (%d)\n", ret);
return ret; return ret;
} }
if (params.show_vars) {
if (params.mod_events) {
pr_err(" Error: Don't use --vars with"
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
ret = show_available_vars(params.events, params.nevents,
params.max_probe_points,
params.target_module,
params.show_ext_vars);
if (ret < 0)
pr_err(" Error: Failed to show vars. (%d)\n", ret);
return ret;
}
#endif #endif
if (params.dellist) { if (params.dellist) {
...@@ -258,8 +313,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ...@@ -258,8 +313,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
if (params.nevents) { if (params.nevents) {
ret = add_perf_probe_events(params.events, params.nevents, ret = add_perf_probe_events(params.events, params.nevents,
params.force_add, params.max_probe_points,
params.max_probe_points); params.target_module,
params.force_add);
if (ret < 0) { if (ret < 0) {
pr_err(" Error: Failed to add events. (%d)\n", ret); pr_err(" Error: Failed to add events. (%d)\n", ret);
return ret; return ret;
......
...@@ -353,7 +353,7 @@ static void create_counter(int counter, int cpu) ...@@ -353,7 +353,7 @@ static void create_counter(int counter, int cpu)
} }
if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
perror("Unable to read perf file descriptor\n"); perror("Unable to read perf file descriptor");
exit(-1); exit(-1);
} }
...@@ -626,7 +626,7 @@ static int __cmd_record(int argc, const char **argv) ...@@ -626,7 +626,7 @@ static int __cmd_record(int argc, const char **argv)
nr_cpus = read_cpu_map(cpu_list); nr_cpus = read_cpu_map(cpu_list);
if (nr_cpus < 1) { if (nr_cpus < 1) {
perror("failed to collect number of CPUs\n"); perror("failed to collect number of CPUs");
return -1; return -1;
} }
...@@ -761,6 +761,9 @@ static int __cmd_record(int argc, const char **argv) ...@@ -761,6 +761,9 @@ static int __cmd_record(int argc, const char **argv)
} }
} }
if (quiet)
return 0;
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
/* /*
...@@ -820,6 +823,7 @@ static const struct option options[] = { ...@@ -820,6 +823,7 @@ static const struct option options[] = {
"do call-graph (stack chain/backtrace) recording"), "do call-graph (stack chain/backtrace) recording"),
OPT_INCR('v', "verbose", &verbose, OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"), "be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
OPT_BOOLEAN('s', "stat", &inherit_stat, OPT_BOOLEAN('s', "stat", &inherit_stat,
"per thread counts"), "per thread counts"),
OPT_BOOLEAN('d', "data", &sample_address, OPT_BOOLEAN('d', "data", &sample_address,
......
...@@ -46,9 +46,6 @@ static struct scripting_ops *scripting_ops; ...@@ -46,9 +46,6 @@ static struct scripting_ops *scripting_ops;
static void setup_scripting(void) static void setup_scripting(void)
{ {
/* make sure PERF_EXEC_PATH is set for scripts */
perf_set_argv_exec_path(perf_exec_path());
setup_perl_scripting(); setup_perl_scripting();
setup_python_scripting(); setup_python_scripting();
...@@ -285,7 +282,7 @@ static int parse_scriptname(const struct option *opt __used, ...@@ -285,7 +282,7 @@ static int parse_scriptname(const struct option *opt __used,
script++; script++;
} else { } else {
script = str; script = str;
ext = strchr(script, '.'); ext = strrchr(script, '.');
if (!ext) { if (!ext) {
fprintf(stderr, "invalid script extension"); fprintf(stderr, "invalid script extension");
return -1; return -1;
...@@ -593,6 +590,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) ...@@ -593,6 +590,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
suffix = REPORT_SUFFIX; suffix = REPORT_SUFFIX;
} }
/* make sure PERF_EXEC_PATH is set for scripts */
perf_set_argv_exec_path(perf_exec_path());
if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) { if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {
char *record_script_path, *report_script_path; char *record_script_path, *report_script_path;
int live_pipe[2]; int live_pipe[2];
...@@ -625,12 +625,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) ...@@ -625,12 +625,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
dup2(live_pipe[1], 1); dup2(live_pipe[1], 1);
close(live_pipe[0]); close(live_pipe[0]);
__argv = malloc(5 * sizeof(const char *)); __argv = malloc(6 * sizeof(const char *));
__argv[0] = "/bin/sh"; __argv[0] = "/bin/sh";
__argv[1] = record_script_path; __argv[1] = record_script_path;
__argv[2] = "-o"; __argv[2] = "-q";
__argv[3] = "-"; __argv[3] = "-o";
__argv[4] = NULL; __argv[4] = "-";
__argv[5] = NULL;
execvp("/bin/sh", (char **)__argv); execvp("/bin/sh", (char **)__argv);
exit(-1); exit(-1);
......
...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then ...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift shift
fi fi
fi fi
perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
...@@ -7,7 +7,7 @@ if [ $# -lt 1 ] ; then ...@@ -7,7 +7,7 @@ if [ $# -lt 1 ] ; then
fi fi
comm=$1 comm=$1
shift shift
perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
#!/bin/bash #!/bin/bash
# description: system-wide r/w activity # description: system-wide r/w activity
perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
...@@ -17,7 +17,7 @@ if [ "$n_args" -gt 0 ] ; then ...@@ -17,7 +17,7 @@ if [ "$n_args" -gt 0 ] ; then
interval=$1 interval=$1
shift shift
fi fi
perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
#!/bin/bash #!/bin/bash
# description: system-wide min/max/avg wakeup latency # description: system-wide min/max/avg wakeup latency
perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
#!/bin/bash #!/bin/bash
# description: workqueue stats (ins/exe/create/destroy) # description: workqueue stats (ins/exe/create/destroy)
perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
......
...@@ -6,6 +6,14 @@ ...@@ -6,6 +6,14 @@
# Public License ("GPL") version 2 as published by the Free Software # Public License ("GPL") version 2 as published by the Free Software
# Foundation. # Foundation.
import errno, os
FUTEX_WAIT = 0
FUTEX_WAKE = 1
FUTEX_PRIVATE_FLAG = 128
FUTEX_CLOCK_REALTIME = 256
FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
NSECS_PER_SEC = 1000000000 NSECS_PER_SEC = 1000000000
def avg(total, n): def avg(total, n):
...@@ -24,5 +32,55 @@ def nsecs_str(nsecs): ...@@ -24,5 +32,55 @@ def nsecs_str(nsecs):
str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)), str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
return str return str
def add_stats(dict, key, value):
if not dict.has_key(key):
dict[key] = (value, value, value, 1)
else:
min, max, avg, count = dict[key]
if value < min:
min = value
if value > max:
max = value
avg = (avg + value) / 2
dict[key] = (min, max, avg, count + 1)
def clear_term(): def clear_term():
print("\x1b[H\x1b[2J") print("\x1b[H\x1b[2J")
audit_package_warned = False
try:
import audit
machine_to_id = {
'x86_64': audit.MACH_86_64,
'alpha' : audit.MACH_ALPHA,
'ia64' : audit.MACH_IA64,
'ppc' : audit.MACH_PPC,
'ppc64' : audit.MACH_PPC64,
's390' : audit.MACH_S390,
's390x' : audit.MACH_S390X,
'i386' : audit.MACH_X86,
'i586' : audit.MACH_X86,
'i686' : audit.MACH_X86,
}
try:
machine_to_id['armeb'] = audit.MACH_ARMEB
except:
pass
machine_id = machine_to_id[os.uname()[4]]
except:
if not audit_package_warned:
audit_package_warned = True
print "Install the audit-libs-python package to get syscall names"
def syscall_name(id):
try:
return audit.audit_syscall_to_name(id, machine_id)
except:
return str(id)
def strerror(nr):
try:
return errno.errorcode[abs(nr)]
except:
return "Unknown %d errno" % nr
...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then ...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift shift
fi fi
fi fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
#!/bin/bash
perf record -a -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
#!/bin/bash
# description: futext contention measurement
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
# description: display a process of packet and processing time # description: display a process of packet and processing time
# args: [tx] [rx] [dev=] [debug] # args: [tx] [rx] [dev=] [debug]
perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@ perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
#!/bin/bash #!/bin/bash
# description: sched migration overview # description: sched migration overview
perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
...@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then ...@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
interval=$1 interval=$1
shift shift
fi fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then ...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift shift
fi fi
fi fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then ...@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift shift
fi fi
fi fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
...@@ -13,21 +13,26 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ ...@@ -13,21 +13,26 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
from perf_trace_context import * from perf_trace_context import *
from Core import * from Core import *
from Util import *
usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
for_comm = None for_comm = None
for_pid = None
if len(sys.argv) > 2: if len(sys.argv) > 2:
sys.exit(usage) sys.exit(usage)
if len(sys.argv) > 1: if len(sys.argv) > 1:
try:
for_pid = int(sys.argv[1])
except:
for_comm = sys.argv[1] for_comm = sys.argv[1]
syscalls = autodict() syscalls = autodict()
def trace_begin(): def trace_begin():
pass print "Press control+C to stop and show the summary"
def trace_end(): def trace_end():
print_error_totals() print_error_totals()
...@@ -35,8 +40,8 @@ def trace_end(): ...@@ -35,8 +40,8 @@ def trace_end():
def raw_syscalls__sys_exit(event_name, context, common_cpu, def raw_syscalls__sys_exit(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm, common_secs, common_nsecs, common_pid, common_comm,
id, ret): id, ret):
if for_comm is not None: if (for_comm and common_comm != for_comm) or \
if common_comm != for_comm: (for_pid and common_pid != for_pid ):
return return
if ret < 0: if ret < 0:
...@@ -62,7 +67,7 @@ def print_error_totals(): ...@@ -62,7 +67,7 @@ def print_error_totals():
print "\n%s [%d]\n" % (comm, pid), print "\n%s [%d]\n" % (comm, pid),
id_keys = syscalls[comm][pid].keys() id_keys = syscalls[comm][pid].keys()
for id in id_keys: for id in id_keys:
print " syscall: %-16d\n" % (id), print " syscall: %-16s\n" % syscall_name(id),
ret_keys = syscalls[comm][pid][id].keys() ret_keys = syscalls[comm][pid][id].keys()
for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True): for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True):
print " err = %-20d %10d\n" % (ret, val), print " err = %-20s %10d\n" % (strerror(ret), val),
# futex contention
# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
# Licensed under the terms of the GNU GPL License version 2
#
# Translation of:
#
# http://sourceware.org/systemtap/wiki/WSFutexContention
#
# to perf python scripting.
#
# Measures futex contention
import os, sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from Util import *
process_names = {}
thread_thislock = {}
thread_blocktime = {}
lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
process_names = {} # long-lived pid-to-execname mapping
def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
nr, uaddr, op, val, utime, uaddr2, val3):
cmd = op & FUTEX_CMD_MASK
if cmd != FUTEX_WAIT:
return # we don't care about originators of WAKE events
process_names[tid] = comm
thread_thislock[tid] = uaddr
thread_blocktime[tid] = nsecs(s, ns)
def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm,
nr, ret):
if thread_blocktime.has_key(tid):
elapsed = nsecs(s, ns) - thread_blocktime[tid]
add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
del thread_blocktime[tid]
del thread_thislock[tid]
def trace_begin():
print "Press control+C to stop and show the summary"
def trace_end():
for (tid, lock) in lock_waits:
min, max, avg, count = lock_waits[tid, lock]
print "%s[%d] lock %x contended %d times, %d avg ns" % \
(process_names[tid], tid, lock, count, avg)
...@@ -8,10 +8,7 @@ ...@@ -8,10 +8,7 @@
# will be refreshed every [interval] seconds. The default interval is # will be refreshed every [interval] seconds. The default interval is
# 3 seconds. # 3 seconds.
import thread import os, sys, thread, time
import time
import os
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace') '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
...@@ -20,7 +17,7 @@ from perf_trace_context import * ...@@ -20,7 +17,7 @@ from perf_trace_context import *
from Core import * from Core import *
from Util import * from Util import *
usage = "perf trace -s syscall-counts.py [comm] [interval]\n"; usage = "perf trace -s sctop.py [comm] [interval]\n";
for_comm = None for_comm = None
default_interval = 3 default_interval = 3
...@@ -71,7 +68,7 @@ def print_syscall_totals(interval): ...@@ -71,7 +68,7 @@ def print_syscall_totals(interval):
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True): reverse = True):
try: try:
print "%-40d %10d\n" % (id, val), print "%-40s %10d\n" % (syscall_name(id), val),
except TypeError: except TypeError:
pass pass
syscalls.clear() syscalls.clear()
......
...@@ -5,29 +5,33 @@ ...@@ -5,29 +5,33 @@
# Displays system-wide system call totals, broken down by syscall. # Displays system-wide system call totals, broken down by syscall.
# If a [comm] arg is specified, only syscalls called by [comm] are displayed. # If a [comm] arg is specified, only syscalls called by [comm] are displayed.
import os import os, sys
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace') '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import * from perf_trace_context import *
from Core import * from Core import *
from Util import syscall_name
usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
for_comm = None for_comm = None
for_pid = None
if len(sys.argv) > 2: if len(sys.argv) > 2:
sys.exit(usage) sys.exit(usage)
if len(sys.argv) > 1: if len(sys.argv) > 1:
try:
for_pid = int(sys.argv[1])
except:
for_comm = sys.argv[1] for_comm = sys.argv[1]
syscalls = autodict() syscalls = autodict()
def trace_begin(): def trace_begin():
pass print "Press control+C to stop and show the summary"
def trace_end(): def trace_end():
print_syscall_totals() print_syscall_totals()
...@@ -35,8 +39,9 @@ def trace_end(): ...@@ -35,8 +39,9 @@ def trace_end():
def raw_syscalls__sys_enter(event_name, context, common_cpu, def raw_syscalls__sys_enter(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm, common_secs, common_nsecs, common_pid, common_comm,
id, args): id, args):
if for_comm is not None:
if common_comm != for_comm: if (for_comm and common_comm != for_comm) or \
(for_pid and common_pid != for_pid ):
return return
try: try:
syscalls[common_comm][common_pid][id] += 1 syscalls[common_comm][common_pid][id] += 1
...@@ -61,4 +66,4 @@ def print_syscall_totals(): ...@@ -61,4 +66,4 @@ def print_syscall_totals():
id_keys = syscalls[comm][pid].keys() id_keys = syscalls[comm][pid].keys()
for id, val in sorted(syscalls[comm][pid].iteritems(), \ for id, val in sorted(syscalls[comm][pid].iteritems(), \
key = lambda(k, v): (v, k), reverse = True): key = lambda(k, v): (v, k), reverse = True):
print " %-38d %10d\n" % (id, val), print " %-38s %10d\n" % (syscall_name(id), val),
...@@ -13,6 +13,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ ...@@ -13,6 +13,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
from perf_trace_context import * from perf_trace_context import *
from Core import * from Core import *
from Util import syscall_name
usage = "perf trace -s syscall-counts.py [comm]\n"; usage = "perf trace -s syscall-counts.py [comm]\n";
...@@ -27,7 +28,7 @@ if len(sys.argv) > 1: ...@@ -27,7 +28,7 @@ if len(sys.argv) > 1:
syscalls = autodict() syscalls = autodict()
def trace_begin(): def trace_begin():
pass print "Press control+C to stop and show the summary"
def trace_end(): def trace_end():
print_syscall_totals() print_syscall_totals()
...@@ -55,4 +56,4 @@ def print_syscall_totals(): ...@@ -55,4 +56,4 @@ def print_syscall_totals():
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True): reverse = True):
print "%-40d %10d\n" % (id, val), print "%-40s %10d\n" % (syscall_name(id), val),
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
#include "debug.h" #include "debug.h"
#include "util.h" #include "util.h"
int verbose = 0; int verbose;
bool dump_trace = false; bool dump_trace = false, quiet = false;
int eprintf(int level, const char *fmt, ...) int eprintf(int level, const char *fmt, ...)
{ {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "event.h" #include "event.h"
extern int verbose; extern int verbose;
extern bool dump_trace; extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
void trace_event(event_t *event); void trace_event(event_t *event);
......
...@@ -215,6 +215,16 @@ struct symbol *map_groups__find_function_by_name(struct map_groups *self, ...@@ -215,6 +215,16 @@ struct symbol *map_groups__find_function_by_name(struct map_groups *self,
return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter); return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);
} }
static inline
struct symbol *machine__find_kernel_function_by_name(struct machine *self,
const char *name,
struct map **mapp,
symbol_filter_t filter)
{
return map_groups__find_function_by_name(&self->kmaps, name, mapp,
filter);
}
int map_groups__fixup_overlappings(struct map_groups *self, struct map *map, int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
int verbose, FILE *fp); int verbose, FILE *fp);
......
...@@ -74,10 +74,9 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) ...@@ -74,10 +74,9 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
static char *synthesize_perf_probe_point(struct perf_probe_point *pp); static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
static struct machine machine; static struct machine machine;
/* Initialize symbol maps and path of vmlinux */ /* Initialize symbol maps and path of vmlinux/modules */
static int init_vmlinux(void) static int init_vmlinux(void)
{ {
struct dso *kernel;
int ret; int ret;
symbol_conf.sort_by_name = true; symbol_conf.sort_by_name = true;
...@@ -91,33 +90,61 @@ static int init_vmlinux(void) ...@@ -91,33 +90,61 @@ static int init_vmlinux(void)
goto out; goto out;
} }
ret = machine__init(&machine, "/", 0); ret = machine__init(&machine, "", HOST_KERNEL_ID);
if (ret < 0) if (ret < 0)
goto out; goto out;
kernel = dso__new_kernel(symbol_conf.vmlinux_name); if (machine__create_kernel_maps(&machine) < 0) {
if (kernel == NULL) pr_debug("machine__create_kernel_maps ");
die("Failed to create kernel dso."); goto out;
}
ret = __machine__create_kernel_maps(&machine, kernel);
if (ret < 0)
pr_debug("Failed to create kernel maps.\n");
out: out:
if (ret < 0) if (ret < 0)
pr_warning("Failed to init vmlinux path.\n"); pr_warning("Failed to init vmlinux path.\n");
return ret; return ret;
} }
#ifdef DWARF_SUPPORT static struct symbol *__find_kernel_function_by_name(const char *name,
static int open_vmlinux(void) struct map **mapp)
{
return machine__find_kernel_function_by_name(&machine, name, mapp,
NULL);
}
const char *kernel_get_module_path(const char *module)
{ {
if (map__load(machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) { struct dso *dso;
if (module) {
list_for_each_entry(dso, &machine.kernel_dsos, node) {
if (strncmp(dso->short_name + 1, module,
dso->short_name_len - 2) == 0)
goto found;
}
pr_debug("Failed to find module %s.\n", module);
return NULL;
} else {
dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
if (dso__load_vmlinux_path(dso,
machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
pr_debug("Failed to load kernel map.\n"); pr_debug("Failed to load kernel map.\n");
return -EINVAL; return NULL;
}
}
found:
return dso->long_name;
}
#ifdef DWARF_SUPPORT
static int open_vmlinux(const char *module)
{
const char *path = kernel_get_module_path(module);
if (!path) {
pr_err("Failed to find path of %s module", module ?: "kernel");
return -ENOENT;
} }
pr_debug("Try to open %s\n", machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name); pr_debug("Try to open %s\n", path);
return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY); return open(path, O_RDONLY);
} }
/* /*
...@@ -128,17 +155,16 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, ...@@ -128,17 +155,16 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
struct perf_probe_point *pp) struct perf_probe_point *pp)
{ {
struct symbol *sym; struct symbol *sym;
int fd, ret = -ENOENT; struct map *map;
u64 addr;
int ret = -ENOENT;
sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION], sym = __find_kernel_function_by_name(tp->symbol, &map);
tp->symbol, NULL);
if (sym) { if (sym) {
fd = open_vmlinux(); addr = map->unmap_ip(map, sym->start + tp->offset);
if (fd >= 0) { pr_debug("try to find %s+%ld@%llx\n", tp->symbol,
ret = find_perf_probe_point(fd, tp->offset, addr);
sym->start + tp->offset, pp); ret = find_perf_probe_point((unsigned long)addr, pp);
close(fd);
}
} }
if (ret <= 0) { if (ret <= 0) {
pr_debug("Failed to find corresponding probes from " pr_debug("Failed to find corresponding probes from "
...@@ -156,12 +182,12 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, ...@@ -156,12 +182,12 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
/* Try to find perf_probe_event with debuginfo */ /* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev, static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs, struct probe_trace_event **tevs,
int max_tevs) int max_tevs, const char *module)
{ {
bool need_dwarf = perf_probe_event_need_dwarf(pev); bool need_dwarf = perf_probe_event_need_dwarf(pev);
int fd, ntevs; int fd, ntevs;
fd = open_vmlinux(); fd = open_vmlinux(module);
if (fd < 0) { if (fd < 0) {
if (need_dwarf) { if (need_dwarf) {
pr_warning("Failed to open debuginfo file.\n"); pr_warning("Failed to open debuginfo file.\n");
...@@ -300,7 +326,7 @@ static int show_one_line(FILE *fp, int l, bool skip, bool show_num) ...@@ -300,7 +326,7 @@ static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
* Show line-range always requires debuginfo to find source file and * Show line-range always requires debuginfo to find source file and
* line number. * line number.
*/ */
int show_line_range(struct line_range *lr) int show_line_range(struct line_range *lr, const char *module)
{ {
int l = 1; int l = 1;
struct line_node *ln; struct line_node *ln;
...@@ -313,7 +339,7 @@ int show_line_range(struct line_range *lr) ...@@ -313,7 +339,7 @@ int show_line_range(struct line_range *lr)
if (ret < 0) if (ret < 0)
return ret; return ret;
fd = open_vmlinux(); fd = open_vmlinux(module);
if (fd < 0) { if (fd < 0) {
pr_warning("Failed to open debuginfo file.\n"); pr_warning("Failed to open debuginfo file.\n");
return fd; return fd;
...@@ -378,11 +404,84 @@ int show_line_range(struct line_range *lr) ...@@ -378,11 +404,84 @@ int show_line_range(struct line_range *lr)
return ret; return ret;
} }
static int show_available_vars_at(int fd, struct perf_probe_event *pev,
int max_vls, bool externs)
{
char *buf;
int ret, i;
struct str_node *node;
struct variable_list *vls = NULL, *vl;
buf = synthesize_perf_probe_point(&pev->point);
if (!buf)
return -EINVAL;
pr_debug("Searching variables at %s\n", buf);
ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
if (ret > 0) {
/* Some variables were found */
fprintf(stdout, "Available variables at %s\n", buf);
for (i = 0; i < ret; i++) {
vl = &vls[i];
/*
* A probe point might be converted to
* several trace points.
*/
fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
vl->point.offset);
free(vl->point.symbol);
if (vl->vars) {
strlist__for_each(node, vl->vars)
fprintf(stdout, "\t\t%s\n", node->s);
strlist__delete(vl->vars);
} else
fprintf(stdout, "(No variables)\n");
}
free(vls);
} else
pr_err("Failed to find variables at %s (%d)\n", buf, ret);
free(buf);
return ret;
}
/* Show available variables on given probe point */
int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_vls, const char *module, bool externs)
{
int i, fd, ret = 0;
ret = init_vmlinux();
if (ret < 0)
return ret;
fd = open_vmlinux(module);
if (fd < 0) {
pr_warning("Failed to open debuginfo file.\n");
return fd;
}
setup_pager();
for (i = 0; i < npevs && ret >= 0; i++)
ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
close(fd);
return ret;
}
#else /* !DWARF_SUPPORT */ #else /* !DWARF_SUPPORT */
static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
struct perf_probe_point *pp) struct perf_probe_point *pp)
{ {
struct symbol *sym;
sym = __find_kernel_function_by_name(tp->symbol, NULL);
if (!sym) {
pr_err("Failed to find symbol %s in kernel.\n", tp->symbol);
return -ENOENT;
}
pp->function = strdup(tp->symbol); pp->function = strdup(tp->symbol);
if (pp->function == NULL) if (pp->function == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -394,7 +493,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, ...@@ -394,7 +493,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
static int try_to_find_probe_trace_events(struct perf_probe_event *pev, static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs __unused, struct probe_trace_event **tevs __unused,
int max_tevs __unused) int max_tevs __unused, const char *mod __unused)
{ {
if (perf_probe_event_need_dwarf(pev)) { if (perf_probe_event_need_dwarf(pev)) {
pr_warning("Debuginfo-analysis is not supported.\n"); pr_warning("Debuginfo-analysis is not supported.\n");
...@@ -403,12 +502,19 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, ...@@ -403,12 +502,19 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
return 0; return 0;
} }
int show_line_range(struct line_range *lr __unused) int show_line_range(struct line_range *lr __unused, const char *module __unused)
{ {
pr_warning("Debuginfo-analysis is not supported.\n"); pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS; return -ENOSYS;
} }
int show_available_vars(struct perf_probe_event *pevs __unused,
int npevs __unused, int max_vls __unused,
const char *module __unused, bool externs __unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
}
#endif #endif
int parse_line_range_desc(const char *arg, struct line_range *lr) int parse_line_range_desc(const char *arg, struct line_range *lr)
...@@ -1516,14 +1622,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, ...@@ -1516,14 +1622,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
static int convert_to_probe_trace_events(struct perf_probe_event *pev, static int convert_to_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs, struct probe_trace_event **tevs,
int max_tevs) int max_tevs, const char *module)
{ {
struct symbol *sym; struct symbol *sym;
int ret = 0, i; int ret = 0, i;
struct probe_trace_event *tev; struct probe_trace_event *tev;
/* Convert perf_probe_event with debuginfo */ /* Convert perf_probe_event with debuginfo */
ret = try_to_find_probe_trace_events(pev, tevs, max_tevs); ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
if (ret != 0) if (ret != 0)
return ret; return ret;
...@@ -1572,8 +1678,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, ...@@ -1572,8 +1678,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
} }
/* Currently just checking function name from symbol map */ /* Currently just checking function name from symbol map */
sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION], sym = __find_kernel_function_by_name(tev->point.symbol, NULL);
tev->point.symbol, NULL);
if (!sym) { if (!sym) {
pr_warning("Kernel symbol \'%s\' not found.\n", pr_warning("Kernel symbol \'%s\' not found.\n",
tev->point.symbol); tev->point.symbol);
...@@ -1596,7 +1701,7 @@ struct __event_package { ...@@ -1596,7 +1701,7 @@ struct __event_package {
}; };
int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
bool force_add, int max_tevs) int max_tevs, const char *module, bool force_add)
{ {
int i, j, ret; int i, j, ret;
struct __event_package *pkgs; struct __event_package *pkgs;
...@@ -1617,7 +1722,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, ...@@ -1617,7 +1722,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
pkgs[i].pev = &pevs[i]; pkgs[i].pev = &pevs[i];
/* Convert with or without debuginfo */ /* Convert with or without debuginfo */
ret = convert_to_probe_trace_events(pkgs[i].pev, ret = convert_to_probe_trace_events(pkgs[i].pev,
&pkgs[i].tevs, max_tevs); &pkgs[i].tevs,
max_tevs,
module);
if (ret < 0) if (ret < 0)
goto end; goto end;
pkgs[i].ntevs = ret; pkgs[i].ntevs = ret;
......
...@@ -90,6 +90,12 @@ struct line_range { ...@@ -90,6 +90,12 @@ struct line_range {
struct list_head line_list; /* Visible lines */ struct list_head line_list; /* Visible lines */
}; };
/* List of variables */
struct variable_list {
struct probe_trace_point point; /* Actual probepoint */
struct strlist *vars; /* Available variables */
};
/* Command string to events */ /* Command string to events */
extern int parse_perf_probe_command(const char *cmd, extern int parse_perf_probe_command(const char *cmd,
struct perf_probe_event *pev); struct perf_probe_event *pev);
...@@ -109,12 +115,18 @@ extern void clear_perf_probe_event(struct perf_probe_event *pev); ...@@ -109,12 +115,18 @@ extern void clear_perf_probe_event(struct perf_probe_event *pev);
/* Command string to line-range */ /* Command string to line-range */
extern int parse_line_range_desc(const char *cmd, struct line_range *lr); extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
/* Internal use: Return kernel/module path */
extern const char *kernel_get_module_path(const char *module);
extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
bool force_add, int max_probe_points); int max_probe_points, const char *module,
bool force_add);
extern int del_perf_probe_events(struct strlist *dellist); extern int del_perf_probe_events(struct strlist *dellist);
extern int show_perf_probe_events(void); extern int show_perf_probe_events(void);
extern int show_line_range(struct line_range *lr); extern int show_line_range(struct line_range *lr, const char *module);
extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_probe_points, const char *module,
bool externs);
/* Maximum index number of event-name postfix */ /* Maximum index number of event-name postfix */
......
...@@ -116,6 +116,101 @@ static void line_list__free(struct list_head *head) ...@@ -116,6 +116,101 @@ static void line_list__free(struct list_head *head)
} }
} }
/* Dwarf FL wrappers */
static int __linux_kernel_find_elf(Dwfl_Module *mod,
void **userdata,
const char *module_name,
Dwarf_Addr base,
char **file_name, Elf **elfp)
{
int fd;
const char *path = kernel_get_module_path(module_name);
if (path) {
fd = open(path, O_RDONLY);
if (fd >= 0) {
*file_name = strdup(path);
return fd;
}
}
/* If failed, try to call standard method */
return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
file_name, elfp);
}
static char *debuginfo_path; /* Currently dummy */
static const Dwfl_Callbacks offline_callbacks = {
.find_debuginfo = dwfl_standard_find_debuginfo,
.debuginfo_path = &debuginfo_path,
.section_address = dwfl_offline_section_address,
/* We use this table for core files too. */
.find_elf = dwfl_build_id_find_elf,
};
static const Dwfl_Callbacks kernel_callbacks = {
.find_debuginfo = dwfl_standard_find_debuginfo,
.debuginfo_path = &debuginfo_path,
.find_elf = __linux_kernel_find_elf,
.section_address = dwfl_linux_kernel_module_section_address,
};
/* Get a Dwarf from offline image */
static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
{
Dwfl_Module *mod;
Dwarf *dbg = NULL;
if (!dwflp)
return NULL;
*dwflp = dwfl_begin(&offline_callbacks);
if (!*dwflp)
return NULL;
mod = dwfl_report_offline(*dwflp, "", "", fd);
if (!mod)
goto error;
dbg = dwfl_module_getdwarf(mod, bias);
if (!dbg) {
error:
dwfl_end(*dwflp);
*dwflp = NULL;
}
return dbg;
}
/* Get a Dwarf from live kernel image */
static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
Dwarf_Addr *bias)
{
Dwarf *dbg;
if (!dwflp)
return NULL;
*dwflp = dwfl_begin(&kernel_callbacks);
if (!*dwflp)
return NULL;
/* Load the kernel dwarves: Don't care the result here */
dwfl_linux_kernel_report_kernel(*dwflp);
dwfl_linux_kernel_report_modules(*dwflp);
dbg = dwfl_addrdwarf(*dwflp, addr, bias);
/* Here, check whether we could get a real dwarf */
if (!dbg) {
dwfl_end(*dwflp);
*dwflp = NULL;
}
return dbg;
}
/* Dwarf wrappers */ /* Dwarf wrappers */
/* Find the realpath of the target file. */ /* Find the realpath of the target file. */
...@@ -160,26 +255,44 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) ...@@ -160,26 +255,44 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
return name ? (strcmp(tname, name) == 0) : false; return name ? (strcmp(tname, name) == 0) : false;
} }
/* Get type die, but skip qualifiers and typedef */ /* Get type die */
static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{ {
Dwarf_Attribute attr; Dwarf_Attribute attr;
int tag;
do { if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL || dwarf_formref_die(&attr, die_mem))
dwarf_formref_die(&attr, die_mem) == NULL) return die_mem;
else
return NULL; return NULL;
}
tag = dwarf_tag(die_mem); /* Get a type die, but skip qualifiers */
vr_die = die_mem; static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
int tag;
do {
vr_die = die_get_type(vr_die, die_mem);
if (!vr_die)
break;
tag = dwarf_tag(vr_die);
} while (tag == DW_TAG_const_type || } while (tag == DW_TAG_const_type ||
tag == DW_TAG_restrict_type || tag == DW_TAG_restrict_type ||
tag == DW_TAG_volatile_type || tag == DW_TAG_volatile_type ||
tag == DW_TAG_shared_type || tag == DW_TAG_shared_type);
tag == DW_TAG_typedef);
return die_mem; return vr_die;
}
/* Get a type die, but skip qualifiers and typedef */
static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
do {
vr_die = __die_get_real_type(vr_die, die_mem);
} while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
return vr_die;
} }
static bool die_is_signed_type(Dwarf_Die *tp_die) static bool die_is_signed_type(Dwarf_Die *tp_die)
...@@ -320,25 +433,35 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, ...@@ -320,25 +433,35 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
} }
struct __find_variable_param {
const char *name;
Dwarf_Addr addr;
};
static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
{ {
const char *name = data; struct __find_variable_param *fvp = data;
int tag; int tag;
tag = dwarf_tag(die_mem); tag = dwarf_tag(die_mem);
if ((tag == DW_TAG_formal_parameter || if ((tag == DW_TAG_formal_parameter ||
tag == DW_TAG_variable) && tag == DW_TAG_variable) &&
die_compare_name(die_mem, name)) die_compare_name(die_mem, fvp->name))
return DIE_FIND_CB_FOUND; return DIE_FIND_CB_FOUND;
if (dwarf_haspc(die_mem, fvp->addr))
return DIE_FIND_CB_CONTINUE; return DIE_FIND_CB_CONTINUE;
else
return DIE_FIND_CB_SIBLING;
} }
/* Find a variable called 'name' */ /* Find a variable called 'name' at given address */
static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name, static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
Dwarf_Die *die_mem) Dwarf_Addr addr, Dwarf_Die *die_mem)
{ {
return die_find_child(sp_die, __die_find_variable_cb, (void *)name, struct __find_variable_param fvp = { .name = name, .addr = addr};
return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
die_mem); die_mem);
} }
...@@ -361,6 +484,60 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, ...@@ -361,6 +484,60 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
die_mem); die_mem);
} }
/* Get the name of given variable DIE */
static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
{
Dwarf_Die type;
int tag, ret, ret2;
const char *tmp = "";
if (__die_get_real_type(vr_die, &type) == NULL)
return -ENOENT;
tag = dwarf_tag(&type);
if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
tmp = "*";
else if (tag == DW_TAG_subroutine_type) {
/* Function pointer */
ret = snprintf(buf, len, "(function_type)");
return (ret >= len) ? -E2BIG : ret;
} else {
if (!dwarf_diename(&type))
return -ENOENT;
if (tag == DW_TAG_union_type)
tmp = "union ";
else if (tag == DW_TAG_structure_type)
tmp = "struct ";
/* Write a base name */
ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
return (ret >= len) ? -E2BIG : ret;
}
ret = die_get_typename(&type, buf, len);
if (ret > 0) {
ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
}
return ret;
}
/* Get the name and type of given variable DIE, stored as "type\tname" */
static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
{
int ret, ret2;
ret = die_get_typename(vr_die, buf, len);
if (ret < 0) {
pr_debug("Failed to get type, make it unknown.\n");
ret = snprintf(buf, len, "(unknown_type)");
}
if (ret > 0) {
ret2 = snprintf(buf + ret, len - ret, "\t%s",
dwarf_diename(vr_die));
ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
}
return ret;
}
/* /*
* Probe finder related functions * Probe finder related functions
*/ */
...@@ -374,8 +551,13 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) ...@@ -374,8 +551,13 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
return ref; return ref;
} }
/* Show a location */ /*
static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf) * Convert a location into trace_arg.
* If tvar == NULL, this just checks variable can be converted.
*/
static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
Dwarf_Op *fb_ops,
struct probe_trace_arg *tvar)
{ {
Dwarf_Attribute attr; Dwarf_Attribute attr;
Dwarf_Op *op; Dwarf_Op *op;
...@@ -384,20 +566,23 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf) ...@@ -384,20 +566,23 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
Dwarf_Word offs = 0; Dwarf_Word offs = 0;
bool ref = false; bool ref = false;
const char *regs; const char *regs;
struct probe_trace_arg *tvar = pf->tvar;
int ret; int ret;
if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
goto static_var;
/* TODO: handle more than 1 exprs */ /* TODO: handle more than 1 exprs */
if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL || if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 || dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 ||
nops == 0) { nops == 0) {
/* TODO: Support const_value */ /* TODO: Support const_value */
pr_err("Failed to find the location of %s at this address.\n"
" Perhaps, it has been optimized out.\n", pf->pvar->var);
return -ENOENT; return -ENOENT;
} }
if (op->atom == DW_OP_addr) { if (op->atom == DW_OP_addr) {
static_var:
if (!tvar)
return 0;
/* Static variables on memory (not stack), make @varname */ /* Static variables on memory (not stack), make @varname */
ret = strlen(dwarf_diename(vr_die)); ret = strlen(dwarf_diename(vr_die));
tvar->value = zalloc(ret + 2); tvar->value = zalloc(ret + 2);
...@@ -412,14 +597,11 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf) ...@@ -412,14 +597,11 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
/* If this is based on frame buffer, set the offset */ /* If this is based on frame buffer, set the offset */
if (op->atom == DW_OP_fbreg) { if (op->atom == DW_OP_fbreg) {
if (pf->fb_ops == NULL) { if (fb_ops == NULL)
pr_warning("The attribute of frame base is not "
"supported.\n");
return -ENOTSUP; return -ENOTSUP;
}
ref = true; ref = true;
offs = op->number; offs = op->number;
op = &pf->fb_ops[0]; op = &fb_ops[0];
} }
if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) { if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
...@@ -435,13 +617,18 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf) ...@@ -435,13 +617,18 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
} else if (op->atom == DW_OP_regx) { } else if (op->atom == DW_OP_regx) {
regn = op->number; regn = op->number;
} else { } else {
pr_warning("DW_OP %x is not supported.\n", op->atom); pr_debug("DW_OP %x is not supported.\n", op->atom);
return -ENOTSUP; return -ENOTSUP;
} }
if (!tvar)
return 0;
regs = get_arch_regstr(regn); regs = get_arch_regstr(regn);
if (!regs) { if (!regs) {
pr_warning("Mapping for DWARF register number %u missing on this architecture.", regn); /* This should be a bug in DWARF or this tool */
pr_warning("Mapping for DWARF register number %u "
"missing on this architecture.", regn);
return -ERANGE; return -ERANGE;
} }
...@@ -666,8 +853,14 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) ...@@ -666,8 +853,14 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
pr_debug("Converting variable %s into trace event.\n", pr_debug("Converting variable %s into trace event.\n",
dwarf_diename(vr_die)); dwarf_diename(vr_die));
ret = convert_variable_location(vr_die, pf); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
if (ret == 0 && pf->pvar->field) { pf->tvar);
if (ret == -ENOENT)
pr_err("Failed to find the location of %s at this address.\n"
" Perhaps, it has been optimized out.\n", pf->pvar->var);
else if (ret == -ENOTSUP)
pr_err("Sorry, we don't support this variable location yet.\n");
else if (pf->pvar->field) {
ret = convert_variable_fields(vr_die, pf->pvar->var, ret = convert_variable_fields(vr_die, pf->pvar->var,
pf->pvar->field, &pf->tvar->ref, pf->pvar->field, &pf->tvar->ref,
&die_mem); &die_mem);
...@@ -722,56 +915,39 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) ...@@ -722,56 +915,39 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
pr_debug("Searching '%s' variable in context.\n", pr_debug("Searching '%s' variable in context.\n",
pf->pvar->var); pf->pvar->var);
/* Search child die for local variables and parameters. */ /* Search child die for local variables and parameters. */
if (die_find_variable(sp_die, pf->pvar->var, &vr_die)) if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die))
ret = convert_variable(&vr_die, pf); ret = convert_variable(&vr_die, pf);
else { else {
/* Search upper class */ /* Search upper class */
nscopes = dwarf_getscopes_die(sp_die, &scopes); nscopes = dwarf_getscopes_die(sp_die, &scopes);
if (nscopes > 0) { while (nscopes-- > 1) {
ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var, pr_debug("Searching variables in %s\n",
0, NULL, 0, 0, &vr_die); dwarf_diename(&scopes[nscopes]));
if (ret >= 0) /* We should check this scope, so give dummy address */
if (die_find_variable_at(&scopes[nscopes],
pf->pvar->var, 0,
&vr_die)) {
ret = convert_variable(&vr_die, pf); ret = convert_variable(&vr_die, pf);
else goto found;
ret = -ENOENT; }
}
if (scopes)
free(scopes); free(scopes);
} else
ret = -ENOENT; ret = -ENOENT;
} }
found:
if (ret < 0) if (ret < 0)
pr_warning("Failed to find '%s' in this function.\n", pr_warning("Failed to find '%s' in this function.\n",
pf->pvar->var); pf->pvar->var);
return ret; return ret;
} }
/* Show a probe point to output buffer */ /* Convert subprogram DIE to trace point */
static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
bool retprobe, struct probe_trace_point *tp)
{ {
struct probe_trace_event *tev;
Dwarf_Addr eaddr; Dwarf_Addr eaddr;
Dwarf_Die die_mem;
const char *name; const char *name;
int ret, i;
Dwarf_Attribute fb_attr;
size_t nops;
if (pf->ntevs == pf->max_tevs) {
pr_warning("Too many( > %d) probe point found.\n",
pf->max_tevs);
return -ERANGE;
}
tev = &pf->tevs[pf->ntevs++];
/* If no real subprogram, find a real one */
if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
sp_die = die_find_real_subprogram(&pf->cu_die,
pf->addr, &die_mem);
if (!sp_die) {
pr_warning("Failed to find probe point in any "
"functions.\n");
return -ENOENT;
}
}
/* Copy the name of probe point */ /* Copy the name of probe point */
name = dwarf_diename(sp_die); name = dwarf_diename(sp_die);
...@@ -781,26 +957,45 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) ...@@ -781,26 +957,45 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
dwarf_diename(sp_die)); dwarf_diename(sp_die));
return -ENOENT; return -ENOENT;
} }
tev->point.symbol = strdup(name); tp->symbol = strdup(name);
if (tev->point.symbol == NULL) if (tp->symbol == NULL)
return -ENOMEM; return -ENOMEM;
tev->point.offset = (unsigned long)(pf->addr - eaddr); tp->offset = (unsigned long)(paddr - eaddr);
} else } else
/* This function has no name. */ /* This function has no name. */
tev->point.offset = (unsigned long)pf->addr; tp->offset = (unsigned long)paddr;
/* Return probe must be on the head of a subprogram */ /* Return probe must be on the head of a subprogram */
if (pf->pev->point.retprobe) { if (retprobe) {
if (tev->point.offset != 0) { if (eaddr != paddr) {
pr_warning("Return probe must be on the head of" pr_warning("Return probe must be on the head of"
" a real function\n"); " a real function\n");
return -EINVAL; return -EINVAL;
} }
tev->point.retprobe = true; tp->retprobe = true;
} }
pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, return 0;
tev->point.offset); }
/* Call probe_finder callback with real subprogram DIE */
static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
{
Dwarf_Die die_mem;
Dwarf_Attribute fb_attr;
size_t nops;
int ret;
/* If no real subprogram, find a real one */
if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
sp_die = die_find_real_subprogram(&pf->cu_die,
pf->addr, &die_mem);
if (!sp_die) {
pr_warning("Failed to find probe point in any "
"functions.\n");
return -ENOENT;
}
}
/* Get the frame base attribute/ops */ /* Get the frame base attribute/ops */
dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
...@@ -820,22 +1015,13 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) ...@@ -820,22 +1015,13 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
#endif #endif
} }
/* Find each argument */ /* Call finder's callback handler */
tev->nargs = pf->pev->nargs; ret = pf->callback(sp_die, pf);
tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
if (tev->args == NULL)
return -ENOMEM;
for (i = 0; i < pf->pev->nargs; i++) {
pf->pvar = &pf->pev->args[i];
pf->tvar = &tev->args[i];
ret = find_variable(sp_die, pf);
if (ret != 0)
return ret;
}
/* *pf->fb_ops will be cached in libdw. Don't free it. */ /* *pf->fb_ops will be cached in libdw. Don't free it. */
pf->fb_ops = NULL; pf->fb_ops = NULL;
return 0;
return ret;
} }
/* Find probe point from its line number */ /* Find probe point from its line number */
...@@ -871,7 +1057,7 @@ static int find_probe_point_by_line(struct probe_finder *pf) ...@@ -871,7 +1057,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
(int)i, lineno, (uintmax_t)addr); (int)i, lineno, (uintmax_t)addr);
pf->addr = addr; pf->addr = addr;
ret = convert_probe_point(NULL, pf); ret = call_probe_finder(NULL, pf);
/* Continuing, because target line might be inlined. */ /* Continuing, because target line might be inlined. */
} }
return ret; return ret;
...@@ -984,7 +1170,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) ...@@ -984,7 +1170,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
(int)i, lineno, (unsigned long long)addr); (int)i, lineno, (unsigned long long)addr);
pf->addr = addr; pf->addr = addr;
ret = convert_probe_point(sp_die, pf); ret = call_probe_finder(sp_die, pf);
/* Continuing, because target line might be inlined. */ /* Continuing, because target line might be inlined. */
} }
/* TODO: deallocate lines, but how? */ /* TODO: deallocate lines, but how? */
...@@ -1019,7 +1205,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) ...@@ -1019,7 +1205,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
pr_debug("found inline addr: 0x%jx\n", pr_debug("found inline addr: 0x%jx\n",
(uintmax_t)pf->addr); (uintmax_t)pf->addr);
param->retval = convert_probe_point(in_die, pf); param->retval = call_probe_finder(in_die, pf);
if (param->retval < 0) if (param->retval < 0)
return DWARF_CB_ABORT; return DWARF_CB_ABORT;
} }
...@@ -1057,7 +1243,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) ...@@ -1057,7 +1243,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
} }
pf->addr += pp->offset; pf->addr += pp->offset;
/* TODO: Check the address in this function */ /* TODO: Check the address in this function */
param->retval = convert_probe_point(sp_die, pf); param->retval = call_probe_finder(sp_die, pf);
} }
} else { } else {
struct dwarf_callback_param _param = {.data = (void *)pf, struct dwarf_callback_param _param = {.data = (void *)pf,
...@@ -1079,90 +1265,276 @@ static int find_probe_point_by_func(struct probe_finder *pf) ...@@ -1079,90 +1265,276 @@ static int find_probe_point_by_func(struct probe_finder *pf)
return _param.retval; return _param.retval;
} }
/* Find probe_trace_events specified by perf_probe_event from debuginfo */ /* Find probe points from debuginfo */
int find_probe_trace_events(int fd, struct perf_probe_event *pev, static int find_probes(int fd, struct probe_finder *pf)
struct probe_trace_event **tevs, int max_tevs)
{ {
struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs}; struct perf_probe_point *pp = &pf->pev->point;
struct perf_probe_point *pp = &pev->point;
Dwarf_Off off, noff; Dwarf_Off off, noff;
size_t cuhl; size_t cuhl;
Dwarf_Die *diep; Dwarf_Die *diep;
Dwarf *dbg; Dwarf *dbg = NULL;
Dwfl *dwfl;
Dwarf_Addr bias; /* Currently ignored */
int ret = 0; int ret = 0;
pf.tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs); dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
if (pf.tevs == NULL)
return -ENOMEM;
*tevs = pf.tevs;
pf.ntevs = 0;
dbg = dwarf_begin(fd, DWARF_C_READ);
if (!dbg) { if (!dbg) {
pr_warning("No dwarf info found in the vmlinux - " pr_warning("No dwarf info found in the vmlinux - "
"please rebuild with CONFIG_DEBUG_INFO=y.\n"); "please rebuild with CONFIG_DEBUG_INFO=y.\n");
free(pf.tevs);
*tevs = NULL;
return -EBADF; return -EBADF;
} }
#if _ELFUTILS_PREREQ(0, 142) #if _ELFUTILS_PREREQ(0, 142)
/* Get the call frame information from this dwarf */ /* Get the call frame information from this dwarf */
pf.cfi = dwarf_getcfi(dbg); pf->cfi = dwarf_getcfi(dbg);
#endif #endif
off = 0; off = 0;
line_list__init(&pf.lcache); line_list__init(&pf->lcache);
/* Loop on CUs (Compilation Unit) */ /* Loop on CUs (Compilation Unit) */
while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) && while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&
ret >= 0) { ret >= 0) {
/* Get the DIE(Debugging Information Entry) of this CU */ /* Get the DIE(Debugging Information Entry) of this CU */
diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die); diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
if (!diep) if (!diep)
continue; continue;
/* Check if target file is included. */ /* Check if target file is included. */
if (pp->file) if (pp->file)
pf.fname = cu_find_realpath(&pf.cu_die, pp->file); pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
else else
pf.fname = NULL; pf->fname = NULL;
if (!pp->file || pf.fname) { if (!pp->file || pf->fname) {
if (pp->function) if (pp->function)
ret = find_probe_point_by_func(&pf); ret = find_probe_point_by_func(pf);
else if (pp->lazy_line) else if (pp->lazy_line)
ret = find_probe_point_lazy(NULL, &pf); ret = find_probe_point_lazy(NULL, pf);
else { else {
pf.lno = pp->line; pf->lno = pp->line;
ret = find_probe_point_by_line(&pf); ret = find_probe_point_by_line(pf);
} }
} }
off = noff; off = noff;
} }
line_list__free(&pf.lcache); line_list__free(&pf->lcache);
dwarf_end(dbg); if (dwfl)
dwfl_end(dwfl);
return (ret < 0) ? ret : pf.ntevs; return ret;
}
/* Add a found probe point into trace event list */
static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
{
struct trace_event_finder *tf =
container_of(pf, struct trace_event_finder, pf);
struct probe_trace_event *tev;
int ret, i;
/* Check number of tevs */
if (tf->ntevs == tf->max_tevs) {
pr_warning("Too many( > %d) probe point found.\n",
tf->max_tevs);
return -ERANGE;
}
tev = &tf->tevs[tf->ntevs++];
ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
&tev->point);
if (ret < 0)
return ret;
pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
tev->point.offset);
/* Find each argument */
tev->nargs = pf->pev->nargs;
tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
if (tev->args == NULL)
return -ENOMEM;
for (i = 0; i < pf->pev->nargs; i++) {
pf->pvar = &pf->pev->args[i];
pf->tvar = &tev->args[i];
ret = find_variable(sp_die, pf);
if (ret != 0)
return ret;
}
return 0;
}
/* Find probe_trace_events specified by perf_probe_event from debuginfo */
int find_probe_trace_events(int fd, struct perf_probe_event *pev,
struct probe_trace_event **tevs, int max_tevs)
{
struct trace_event_finder tf = {
.pf = {.pev = pev, .callback = add_probe_trace_event},
.max_tevs = max_tevs};
int ret;
/* Allocate result tevs array */
*tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
if (*tevs == NULL)
return -ENOMEM;
tf.tevs = *tevs;
tf.ntevs = 0;
ret = find_probes(fd, &tf.pf);
if (ret < 0) {
free(*tevs);
*tevs = NULL;
return ret;
}
return (ret < 0) ? ret : tf.ntevs;
}
#define MAX_VAR_LEN 64
/* Collect available variables in this scope */
static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
{
struct available_var_finder *af = data;
struct variable_list *vl;
char buf[MAX_VAR_LEN];
int tag, ret;
vl = &af->vls[af->nvls - 1];
tag = dwarf_tag(die_mem);
if (tag == DW_TAG_formal_parameter ||
tag == DW_TAG_variable) {
ret = convert_variable_location(die_mem, af->pf.addr,
af->pf.fb_ops, NULL);
if (ret == 0) {
ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
pr_debug2("Add new var: %s\n", buf);
if (ret > 0)
strlist__add(vl->vars, buf);
}
}
if (af->child && dwarf_haspc(die_mem, af->pf.addr))
return DIE_FIND_CB_CONTINUE;
else
return DIE_FIND_CB_SIBLING;
}
/* Add a found vars into available variables list */
static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
{
struct available_var_finder *af =
container_of(pf, struct available_var_finder, pf);
struct variable_list *vl;
Dwarf_Die die_mem, *scopes = NULL;
int ret, nscopes;
/* Check number of tevs */
if (af->nvls == af->max_vls) {
pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
return -ERANGE;
}
vl = &af->vls[af->nvls++];
ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
&vl->point);
if (ret < 0)
return ret;
pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
vl->point.offset);
/* Find local variables */
vl->vars = strlist__new(true, NULL);
if (vl->vars == NULL)
return -ENOMEM;
af->child = true;
die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem);
/* Find external variables */
if (!af->externs)
goto out;
/* Don't need to search child DIE for externs. */
af->child = false;
nscopes = dwarf_getscopes_die(sp_die, &scopes);
while (nscopes-- > 1)
die_find_child(&scopes[nscopes], collect_variables_cb,
(void *)af, &die_mem);
if (scopes)
free(scopes);
out:
if (strlist__empty(vl->vars)) {
strlist__delete(vl->vars);
vl->vars = NULL;
}
return ret;
}
/* Find available variables at given probe point */
int find_available_vars_at(int fd, struct perf_probe_event *pev,
struct variable_list **vls, int max_vls,
bool externs)
{
struct available_var_finder af = {
.pf = {.pev = pev, .callback = add_available_vars},
.max_vls = max_vls, .externs = externs};
int ret;
/* Allocate result vls array */
*vls = zalloc(sizeof(struct variable_list) * max_vls);
if (*vls == NULL)
return -ENOMEM;
af.vls = *vls;
af.nvls = 0;
ret = find_probes(fd, &af.pf);
if (ret < 0) {
/* Free vlist for error */
while (af.nvls--) {
if (af.vls[af.nvls].point.symbol)
free(af.vls[af.nvls].point.symbol);
if (af.vls[af.nvls].vars)
strlist__delete(af.vls[af.nvls].vars);
}
free(af.vls);
*vls = NULL;
return ret;
}
return (ret < 0) ? ret : af.nvls;
} }
/* Reverse search */ /* Reverse search */
int find_perf_probe_point(int fd, unsigned long addr, int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
struct perf_probe_point *ppt)
{ {
Dwarf_Die cudie, spdie, indie; Dwarf_Die cudie, spdie, indie;
Dwarf *dbg; Dwarf *dbg = NULL;
Dwfl *dwfl = NULL;
Dwarf_Line *line; Dwarf_Line *line;
Dwarf_Addr laddr, eaddr; Dwarf_Addr laddr, eaddr, bias = 0;
const char *tmp; const char *tmp;
int lineno, ret = 0; int lineno, ret = 0;
bool found = false; bool found = false;
dbg = dwarf_begin(fd, DWARF_C_READ); /* Open the live linux kernel */
if (!dbg) dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
return -EBADF; if (!dbg) {
pr_warning("No dwarf info found in the vmlinux - "
"please rebuild with CONFIG_DEBUG_INFO=y.\n");
ret = -EINVAL;
goto end;
}
/* Adjust address with bias */
addr += bias;
/* Find cu die */ /* Find cu die */
if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) { if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
pr_warning("No CU DIE is found at %lx\n", addr);
ret = -EINVAL; ret = -EINVAL;
goto end; goto end;
} }
...@@ -1225,7 +1597,8 @@ int find_perf_probe_point(int fd, unsigned long addr, ...@@ -1225,7 +1597,8 @@ int find_perf_probe_point(int fd, unsigned long addr,
} }
end: end:
dwarf_end(dbg); if (dwfl)
dwfl_end(dwfl);
if (ret >= 0) if (ret >= 0)
ret = found ? 1 : 0; ret = found ? 1 : 0;
return ret; return ret;
...@@ -1358,6 +1731,9 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) ...@@ -1358,6 +1731,9 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
struct line_finder *lf = param->data; struct line_finder *lf = param->data;
struct line_range *lr = lf->lr; struct line_range *lr = lf->lr;
pr_debug("find (%llx) %s\n",
(unsigned long long)dwarf_dieoffset(sp_die),
dwarf_diename(sp_die));
if (dwarf_tag(sp_die) == DW_TAG_subprogram && if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
die_compare_name(sp_die, lr->function)) { die_compare_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die); lf->fname = dwarf_decl_file(sp_die);
...@@ -1401,10 +1777,12 @@ int find_line_range(int fd, struct line_range *lr) ...@@ -1401,10 +1777,12 @@ int find_line_range(int fd, struct line_range *lr)
Dwarf_Off off = 0, noff; Dwarf_Off off = 0, noff;
size_t cuhl; size_t cuhl;
Dwarf_Die *diep; Dwarf_Die *diep;
Dwarf *dbg; Dwarf *dbg = NULL;
Dwfl *dwfl;
Dwarf_Addr bias; /* Currently ignored */
const char *comp_dir; const char *comp_dir;
dbg = dwarf_begin(fd, DWARF_C_READ); dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
if (!dbg) { if (!dbg) {
pr_warning("No dwarf info found in the vmlinux - " pr_warning("No dwarf info found in the vmlinux - "
"please rebuild with CONFIG_DEBUG_INFO=y.\n"); "please rebuild with CONFIG_DEBUG_INFO=y.\n");
...@@ -1450,8 +1828,7 @@ int find_line_range(int fd, struct line_range *lr) ...@@ -1450,8 +1828,7 @@ int find_line_range(int fd, struct line_range *lr)
} }
pr_debug("path: %s\n", lr->path); pr_debug("path: %s\n", lr->path);
dwarf_end(dbg); dwfl_end(dwfl);
return (ret < 0) ? ret : lf.found; return (ret < 0) ? ret : lf.found;
} }
...@@ -22,20 +22,27 @@ extern int find_probe_trace_events(int fd, struct perf_probe_event *pev, ...@@ -22,20 +22,27 @@ extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,
int max_tevs); int max_tevs);
/* Find a perf_probe_point from debuginfo */ /* Find a perf_probe_point from debuginfo */
extern int find_perf_probe_point(int fd, unsigned long addr, extern int find_perf_probe_point(unsigned long addr,
struct perf_probe_point *ppt); struct perf_probe_point *ppt);
/* Find a line range */
extern int find_line_range(int fd, struct line_range *lr); extern int find_line_range(int fd, struct line_range *lr);
/* Find available variables */
extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
struct variable_list **vls, int max_points,
bool externs);
#include <dwarf.h> #include <dwarf.h>
#include <libdw.h> #include <libdw.h>
#include <libdwfl.h>
#include <version.h> #include <version.h>
struct probe_finder { struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */ struct perf_probe_event *pev; /* Target probe event */
struct probe_trace_event *tevs; /* Result trace events */
int ntevs; /* Number of trace events */ /* Callback when a probe point is found */
int max_tevs; /* Max number of trace events */ int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);
/* For function searching */ /* For function searching */
int lno; /* Line number */ int lno; /* Line number */
...@@ -53,6 +60,22 @@ struct probe_finder { ...@@ -53,6 +60,22 @@ struct probe_finder {
struct probe_trace_arg *tvar; /* Current result variable */ struct probe_trace_arg *tvar; /* Current result variable */
}; };
struct trace_event_finder {
struct probe_finder pf;
struct probe_trace_event *tevs; /* Found trace events */
int ntevs; /* Number of trace events */
int max_tevs; /* Max number of trace events */
};
struct available_var_finder {
struct probe_finder pf;
struct variable_list *vls; /* Found variable lists */
int nvls; /* Number of variable lists */
int max_vls; /* Max no. of variable lists */
bool externs; /* Find external vars too */
bool child; /* Search child scopes */
};
struct line_finder { struct line_finder {
struct line_range *lr; /* Target line range */ struct line_range *lr; /* Target line range */
......
#include <slang.h>
#include "libslang.h" #include "libslang.h"
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/list.h> #include <linux/list.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment