Commit 44a68397 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'perf/fast' into perf/core

Merge reason: Lets ready it for v3.4
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 801493c2 08aa0d1f
......@@ -12,10 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
/* ARM performance counters start from 1 (in the cp15 accesses) so use the
* same indexes here for consistency. */
#define PERF_EVENT_INDEX_OFFSET 1
/* ARM perf PMU IDs for use by internal perf clients. */
enum arm_perf_pmu_ids {
ARM_PERF_PMU_ID_XSCALE1 = 0,
......
......@@ -12,6 +12,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
#define PERF_EVENT_INDEX_OFFSET 0
#endif /* _ASM_PERF_EVENT_H */
......@@ -19,6 +19,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
#define PERF_EVENT_INDEX_OFFSET 0
#endif /* _ASM_PERF_EVENT_H */
......@@ -61,8 +61,6 @@ struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
#define PERF_EVENT_INDEX_OFFSET 1
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
......
......@@ -1187,6 +1187,11 @@ static int power_pmu_event_init(struct perf_event *event)
return err;
}
static int power_pmu_event_idx(struct perf_event *event)
{
return event->hw.idx;
}
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
......@@ -1199,6 +1204,7 @@ struct pmu power_pmu = {
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
.event_idx = power_pmu_event_idx,
};
/*
......
......@@ -6,4 +6,3 @@
/* Empty, just to avoid compiling error */
#define PERF_EVENT_INDEX_OFFSET 0
......@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
#define PERF_EVENT_INDEX_OFFSET 0
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
......
......@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/device.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
......@@ -31,6 +32,7 @@
#include <asm/compat.h>
#include <asm/smp.h>
#include <asm/alternative.h>
#include <asm/timer.h>
#include "perf_event.h"
......@@ -1210,6 +1212,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
if (x86_pmu.attr_rdpmc)
set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
......@@ -1319,6 +1323,8 @@ static int __init init_hw_perf_events(void)
}
}
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
......@@ -1542,10 +1548,71 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}
static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
idx -= X86_PMC_IDX_FIXED;
idx |= 1 << 30;
}
return idx + 1;
}
static ssize_t get_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
}
static void change_rdpmc(void *info)
{
bool enable = !!(unsigned long)info;
if (enable)
set_in_cr4(X86_CR4_PCE);
else
clear_in_cr4(X86_CR4_PCE);
}
static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
{
unsigned long val = simple_strtoul(buf, NULL, 0);
if (!!val != !!x86_pmu.attr_rdpmc) {
x86_pmu.attr_rdpmc = !!val;
smp_call_function(change_rdpmc, (void *)val, 1);
}
return count;
}
static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
static struct attribute *x86_pmu_attrs[] = {
&dev_attr_rdpmc.attr,
NULL,
};
static struct attribute_group x86_pmu_attr_group = {
.attrs = x86_pmu_attrs,
};
static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
NULL,
};
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
.add = x86_pmu_add,
......@@ -1557,8 +1624,23 @@ static struct pmu pmu = {
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
.event_idx = x86_pmu_event_idx,
};
void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
{
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return;
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
}
/*
* callchain support
*/
......
......@@ -307,6 +307,14 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
/*
* sysfs attrs
*/
int attr_rdpmc;
/*
* CPU Hotplug hooks
*/
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
......
......@@ -291,12 +291,14 @@ struct perf_event_mmap_page {
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
__u64 time_running; /* time event on cpu */
__u32 time_mult, time_shift;
__u64 time_offset;
/*
* Hole for extension of the self monitor capabilities
*/
__u64 __reserved[123]; /* align to 1k */
__u64 __reserved[121]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
......@@ -615,6 +617,7 @@ struct pmu {
struct list_head entry;
struct device *dev;
const struct attribute_group **attr_groups;
char *name;
int type;
......@@ -680,6 +683,12 @@ struct pmu {
* for each successful ->add() during the transaction.
*/
void (*cancel_txn) (struct pmu *pmu); /* optional */
/*
* Will return the value for perf_event_mmap_page::index for this event,
* if no implementation is provided it will default to: event->hw.idx + 1.
*/
int (*event_idx) (struct perf_event *event); /*optional */
};
/**
......
......@@ -3208,10 +3208,6 @@ int perf_event_task_disable(void)
return 0;
}
#ifndef PERF_EVENT_INDEX_OFFSET
# define PERF_EVENT_INDEX_OFFSET 0
#endif
static int perf_event_index(struct perf_event *event)
{
if (event->hw.state & PERF_HES_STOPPED)
......@@ -3220,21 +3216,26 @@ static int perf_event_index(struct perf_event *event)
if (event->state != PERF_EVENT_STATE_ACTIVE)
return 0;
return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
return event->pmu->event_idx(event);
}
static void calc_timer_values(struct perf_event *event,
u64 *now,
u64 *enabled,
u64 *running)
{
u64 now, ctx_time;
u64 ctx_time;
now = perf_clock();
ctx_time = event->shadow_ctx_time + now;
*now = perf_clock();
ctx_time = event->shadow_ctx_time + *now;
*enabled = ctx_time - event->tstamp_enabled;
*running = ctx_time - event->tstamp_running;
}
void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
{
}
/*
* Callers need to ensure there can be no nesting of this function, otherwise
* the seqlock logic goes bad. We can not serialize this because the arch
......@@ -3244,7 +3245,7 @@ void perf_event_update_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
struct ring_buffer *rb;
u64 enabled, running;
u64 enabled, running, now;
rcu_read_lock();
/*
......@@ -3256,7 +3257,7 @@ void perf_event_update_userpage(struct perf_event *event)
* because of locking issue as we can be called in
* NMI context
*/
calc_timer_values(event, &enabled, &running);
calc_timer_values(event, &now, &enabled, &running);
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;
......@@ -3272,7 +3273,7 @@ void perf_event_update_userpage(struct perf_event *event)
barrier();
userpg->index = perf_event_index(event);
userpg->offset = perf_event_count(event);
if (event->state == PERF_EVENT_STATE_ACTIVE)
if (userpg->index)
userpg->offset -= local64_read(&event->hw.prev_count);
userpg->time_enabled = enabled +
......@@ -3281,6 +3282,8 @@ void perf_event_update_userpage(struct perf_event *event)
userpg->time_running = running +
atomic64_read(&event->child_total_time_running);
perf_update_user_clock(userpg, now);
barrier();
++userpg->lock;
preempt_enable();
......@@ -3538,6 +3541,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
event->mmap_user = get_current_user();
vma->vm_mm->pinned_vm += event->mmap_locked;
perf_event_update_userpage(event);
unlock:
if (!ret)
atomic_inc(&event->mmap_count);
......@@ -3769,7 +3774,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
u64 enabled = 0, running = 0;
u64 enabled = 0, running = 0, now;
u64 read_format = event->attr.read_format;
/*
......@@ -3782,7 +3787,7 @@ static void perf_output_read(struct perf_output_handle *handle,
* NMI context
*/
if (read_format & PERF_FORMAT_TOTAL_TIMES)
calc_timer_values(event, &enabled, &running);
calc_timer_values(event, &now, &enabled, &running);
if (event->attr.read_format & PERF_FORMAT_GROUP)
perf_output_read_group(handle, event, enabled, running);
......@@ -4994,6 +4999,11 @@ static int perf_swevent_init(struct perf_event *event)
return 0;
}
static int perf_swevent_event_idx(struct perf_event *event)
{
return 0;
}
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
......@@ -5003,6 +5013,8 @@ static struct pmu perf_swevent = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.event_idx = perf_swevent_event_idx,
};
#ifdef CONFIG_EVENT_TRACING
......@@ -5089,6 +5101,8 @@ static struct pmu perf_tracepoint = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.event_idx = perf_swevent_event_idx,
};
static inline void perf_tp_register(void)
......@@ -5308,6 +5322,8 @@ static struct pmu perf_cpu_clock = {
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
.event_idx = perf_swevent_event_idx,
};
/*
......@@ -5380,6 +5396,8 @@ static struct pmu perf_task_clock = {
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
.event_idx = perf_swevent_event_idx,
};
static void perf_pmu_nop_void(struct pmu *pmu)
......@@ -5407,6 +5425,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
perf_pmu_enable(pmu);
}
static int perf_event_idx_default(struct perf_event *event)
{
return event->hw.idx + 1;
}
/*
* Ensures all contexts with the same task_ctx_nr have the same
* pmu_cpu_context too.
......@@ -5493,6 +5516,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
if (!pmu->dev)
goto out;
pmu->dev->groups = pmu->attr_groups;
device_initialize(pmu->dev);
ret = dev_set_name(pmu->dev, "%s", pmu->name);
if (ret)
......@@ -5596,6 +5620,9 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type)
pmu->pmu_disable = perf_pmu_nop_void;
}
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;
list_add_rcu(&pmu->entry, &pmus);
ret = 0;
unlock:
......
......@@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
bp->hw.state = PERF_HES_STOPPED;
}
static int hw_breakpoint_event_idx(struct perf_event *bp)
{
return 0;
}
static struct pmu perf_breakpoint = {
.task_ctx_nr = perf_sw_context, /* could eventually get its own */
......@@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = {
.start = hw_breakpoint_start,
.stop = hw_breakpoint_stop,
.read = hw_breakpoint_pmu_read,
.event_idx = hw_breakpoint_event_idx,
};
int __init init_hw_breakpoint(void)
......
......@@ -15,6 +15,8 @@
#include "util/thread_map.h"
#include "../../include/linux/hw_breakpoint.h"
#include <sys/mman.h>
static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
{
bool *visited = symbol__priv(sym);
......@@ -1296,6 +1298,173 @@ static int test__PERF_RECORD(void)
return (err < 0 || errs > 0) ? -1 : 0;
}
#if defined(__x86_64__) || defined(__i386__)
#define barrier() asm volatile("" ::: "memory")
static u64 rdpmc(unsigned int counter)
{
unsigned int low, high;
asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
return low | ((u64)high) << 32;
}
static u64 rdtsc(void)
{
unsigned int low, high;
asm volatile("rdtsc" : "=a" (low), "=d" (high));
return low | ((u64)high) << 32;
}
static u64 mmap_read_self(void *addr)
{
struct perf_event_mmap_page *pc = addr;
u32 seq, idx, time_mult = 0, time_shift = 0;
u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
do {
seq = pc->lock;
barrier();
enabled = pc->time_enabled;
running = pc->time_running;
if (enabled != running) {
cyc = rdtsc();
time_mult = pc->time_mult;
time_shift = pc->time_shift;
time_offset = pc->time_offset;
}
idx = pc->index;
count = pc->offset;
if (idx)
count += rdpmc(idx - 1);
barrier();
} while (pc->lock != seq);
if (enabled != running) {
u64 quot, rem;
quot = (cyc >> time_shift);
rem = cyc & ((1 << time_shift) - 1);
delta = time_offset + quot * time_mult +
((rem * time_mult) >> time_shift);
enabled += delta;
if (idx)
running += delta;
quot = count / running;
rem = count % running;
count = quot * enabled + (rem * enabled) / running;
}
return count;
}
/*
* If the RDPMC instruction faults then signal this back to the test parent task:
*/
static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
{
exit(-1);
}
static int __test__rdpmc(void)
{
long page_size = sysconf(_SC_PAGE_SIZE);
volatile int tmp = 0;
u64 i, loops = 1000;
int n;
int fd;
void *addr;
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_INSTRUCTIONS,
.exclude_kernel = 1,
};
u64 delta_sum = 0;
struct sigaction sa;
sigfillset(&sa.sa_mask);
sa.sa_sigaction = segfault_handler;
sigaction(SIGSEGV, &sa, NULL);
fprintf(stderr, "\n\n");
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd < 0) {
die("Error: sys_perf_event_open() syscall returned "
"with %d (%s)\n", fd, strerror(errno));
}
addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
if (addr == (void *)(-1)) {
die("Error: mmap() syscall returned "
"with (%s)\n", strerror(errno));
}
for (n = 0; n < 6; n++) {
u64 stamp, now, delta;
stamp = mmap_read_self(addr);
for (i = 0; i < loops; i++)
tmp++;
now = mmap_read_self(addr);
loops *= 10;
delta = now - stamp;
fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
delta_sum += delta;
}
munmap(addr, page_size);
close(fd);
fprintf(stderr, " ");
if (!delta_sum)
return -1;
return 0;
}
static int test__rdpmc(void)
{
int status = 0;
int wret = 0;
int ret;
int pid;
pid = fork();
if (pid < 0)
return -1;
if (!pid) {
ret = __test__rdpmc();
exit(ret);
}
wret = waitpid(pid, &status, 0);
if (wret < 0 || status)
return -1;
return 0;
}
#endif
static struct test {
const char *desc;
int (*func)(void);
......@@ -1320,6 +1489,12 @@ static struct test {
.desc = "parse events tests",
.func = test__parse_events,
},
#if defined(__x86_64__) || defined(__i386__)
{
.desc = "x86 rdpmc test",
.func = test__rdpmc,
},
#endif
{
.desc = "Validate PERF_RECORD_* events & perf_sample fields",
.func = test__PERF_RECORD,
......@@ -1412,7 +1587,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
if (symbol__init() < 0)
return -1;
setup_pager();
return __cmd_test(argc, argv);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment