Commit 7b732a75 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf_counter: new output ABI - part 1

Impact: Rework the perfcounter output ABI

use sys_read() only for instant data and provide mmap() output for all
async overflow data.

The first mmap() determines the size of the output buffer. The mmap()
size must be a PAGE_SIZE multiple of 1+pages, where pages must be a
power of 2 or 0. Further mmap()s of the same fd must have the same
size. Once all maps are gone, you can again mmap() with a new size.

In case of 0 extra pages there is no data output and the first page
only contains meta data.

When there are data pages, a poll() event will be generated for each
full page of data. Furthermore, the output is circular. This means
that although 1 page is a valid configuration, its useless, since
we'll start overwriting it the instant we report a full page.

Future work will focus on the output format (currently maintained)
where we'll likey want each entry denoted by a header which includes a
type and length.

Further future work will allow to splice() the fd, also containing the
async overflow data -- splice() would be mutually exclusive with
mmap() of the data.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.470536358@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b09d2501
...@@ -417,7 +417,6 @@ void hw_perf_restore(u64 disable) ...@@ -417,7 +417,6 @@ void hw_perf_restore(u64 disable)
atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.prev_count, val);
counter->hw.idx = hwc_index[i] + 1; counter->hw.idx = hwc_index[i] + 1;
write_pmc(counter->hw.idx, val); write_pmc(counter->hw.idx, val);
if (counter->user_page)
perf_counter_update_userpage(counter); perf_counter_update_userpage(counter);
} }
mb(); mb();
...@@ -574,7 +573,6 @@ static void power_perf_disable(struct perf_counter *counter) ...@@ -574,7 +573,6 @@ static void power_perf_disable(struct perf_counter *counter)
ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
write_pmc(counter->hw.idx, 0); write_pmc(counter->hw.idx, 0);
counter->hw.idx = 0; counter->hw.idx = 0;
if (counter->user_page)
perf_counter_update_userpage(counter); perf_counter_update_userpage(counter);
break; break;
} }
...@@ -702,7 +700,6 @@ static void record_and_restart(struct perf_counter *counter, long val, ...@@ -702,7 +700,6 @@ static void record_and_restart(struct perf_counter *counter, long val,
write_pmc(counter->hw.idx, val); write_pmc(counter->hw.idx, val);
atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.prev_count, val);
atomic64_set(&counter->hw.period_left, left); atomic64_set(&counter->hw.period_left, left);
if (counter->user_page)
perf_counter_update_userpage(counter); perf_counter_update_userpage(counter);
/* /*
......
...@@ -152,6 +152,8 @@ struct perf_counter_mmap_page { ...@@ -152,6 +152,8 @@ struct perf_counter_mmap_page {
__u32 lock; /* seqlock for synchronization */ __u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware counter identifier */ __u32 index; /* hardware counter identifier */
__s64 offset; /* add to hardware counter value */ __s64 offset; /* add to hardware counter value */
__u32 data_head; /* head in the data section */
}; };
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -218,21 +220,6 @@ struct hw_perf_counter { ...@@ -218,21 +220,6 @@ struct hw_perf_counter {
#endif #endif
}; };
/*
* Hardcoded buffer length limit for now, for IRQ-fed events:
*/
#define PERF_DATA_BUFLEN 2048
/**
* struct perf_data - performance counter IRQ data sampling ...
*/
struct perf_data {
int len;
int rd_idx;
int overrun;
u8 data[PERF_DATA_BUFLEN];
};
struct perf_counter; struct perf_counter;
/** /**
...@@ -256,6 +243,14 @@ enum perf_counter_active_state { ...@@ -256,6 +243,14 @@ enum perf_counter_active_state {
struct file; struct file;
struct perf_mmap_data {
struct rcu_head rcu_head;
int nr_pages;
atomic_t head;
struct perf_counter_mmap_page *user_page;
void *data_pages[0];
};
/** /**
* struct perf_counter - performance counter kernel representation: * struct perf_counter - performance counter kernel representation:
*/ */
...@@ -289,16 +284,15 @@ struct perf_counter { ...@@ -289,16 +284,15 @@ struct perf_counter {
int oncpu; int oncpu;
int cpu; int cpu;
/* pointer to page shared with userspace via mmap */ /* mmap bits */
unsigned long user_page; struct mutex mmap_mutex;
atomic_t mmap_count;
struct perf_mmap_data *data;
/* read() / irq related data */ /* poll related */
wait_queue_head_t waitq; wait_queue_head_t waitq;
/* optional: for NMIs */ /* optional: for NMIs */
int wakeup_pending; int wakeup_pending;
struct perf_data *irqdata;
struct perf_data *usrdata;
struct perf_data data[2];
void (*destroy)(struct perf_counter *); void (*destroy)(struct perf_counter *);
struct rcu_head rcu_head; struct rcu_head rcu_head;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment