Commit 57c0c15b authored by Ingo Molnar's avatar Ingo Molnar

perf: Tidy up after the big rename

 - provide compatibility Kconfig entry for existing PERF_COUNTERS .config's

 - provide courtesy copy of old perf_counter.h, for user-space projects

 - small indentation fixups

 - fix up MAINTAINERS

 - fix small x86 printout fallout

 - fix up small PowerPC comment fallout (use 'counter' as in register)
Reviewed-by: default avatarArjan van de Ven <arjan@linux.intel.com>
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent cdd6c482
...@@ -4000,7 +4000,7 @@ S: Maintained ...@@ -4000,7 +4000,7 @@ S: Maintained
F: include/linux/delayacct.h F: include/linux/delayacct.h
F: kernel/delayacct.c F: kernel/delayacct.c
PERFORMANCE COUNTER SUBSYSTEM PERFORMANCE EVENTS SUBSYSTEM
M: Peter Zijlstra <a.p.zijlstra@chello.nl> M: Peter Zijlstra <a.p.zijlstra@chello.nl>
M: Paul Mackerras <paulus@samba.org> M: Paul Mackerras <paulus@samba.org>
M: Ingo Molnar <mingo@elte.hu> M: Ingo Molnar <mingo@elte.hu>
......
...@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); ...@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
struct power_pmu *ppmu; struct power_pmu *ppmu;
/* /*
* Normally, to ignore kernel events we set the FCS (freeze events * Normally, to ignore kernel events we set the FCS (freeze counters
* in supervisor mode) bit in MMCR0, but if the kernel runs with the * in supervisor mode) bit in MMCR0, but if the kernel runs with the
* hypervisor bit set in the MSR, or if we are running on a processor * hypervisor bit set in the MSR, or if we are running on a processor
* where the hypervisor bit is forced to 1 (as on Apple G5 processors), * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
...@@ -159,7 +159,7 @@ void perf_event_print_debug(void) ...@@ -159,7 +159,7 @@ void perf_event_print_debug(void)
} }
/* /*
* Read one performance monitor event (PMC). * Read one performance monitor counter (PMC).
*/ */
static unsigned long read_pmc(int idx) static unsigned long read_pmc(int idx)
{ {
...@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event) ...@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event)
val = read_pmc(event->hw.idx); val = read_pmc(event->hw.idx);
} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
/* The events are only 32 bits wide */ /* The counters are only 32 bits wide */
delta = (val - prev) & 0xfffffffful; delta = (val - prev) & 0xfffffffful;
atomic64_add(delta, &event->count); atomic64_add(delta, &event->count);
atomic64_sub(delta, &event->hw.period_left); atomic64_sub(delta, &event->hw.period_left);
...@@ -543,7 +543,7 @@ void hw_perf_disable(void) ...@@ -543,7 +543,7 @@ void hw_perf_disable(void)
} }
/* /*
* Set the 'freeze events' bit. * Set the 'freeze counters' bit.
* The barrier is to make sure the mtspr has been * The barrier is to make sure the mtspr has been
* executed and the PMU has frozen the events * executed and the PMU has frozen the events
* before we return. * before we return.
...@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
} }
/* /*
* A event has overflowed; update its count and record * A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled * things if requested. Note that interrupts are hard-disabled
* here so there is no possibility of being interrupted. * here so there is no possibility of being interrupted.
*/ */
...@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs) ...@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
/* /*
* Reset MMCR0 to its normal value. This will set PMXE and * Reset MMCR0 to its normal value. This will set PMXE and
* clear FC (freeze events) and PMAO (perf mon alert occurred) * clear FC (freeze counters) and PMAO (perf mon alert occurred)
* and thus allow interrupts to occur again. * and thus allow interrupts to occur again.
* XXX might want to use MSR.PM to keep the events frozen until * XXX might want to use MSR.PM to keep the events frozen until
* we get back out of this interrupt. * we get back out of this interrupt.
......
...@@ -2083,7 +2083,7 @@ void __init init_hw_perf_events(void) ...@@ -2083,7 +2083,7 @@ void __init init_hw_perf_events(void)
pr_info("... version: %d\n", x86_pmu.version); pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.event_bits); pr_info("... bit width: %d\n", x86_pmu.event_bits);
pr_info("... generic events: %d\n", x86_pmu.num_events); pr_info("... generic registers: %d\n", x86_pmu.num_events);
pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
......
/*
* NOTE: this file will be removed in a future kernel release, it is
* provided as a courtesy copy of user-space code that relies on the
* old (pre-rename) symbols and constants.
*
* Performance events:
*
* Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
*
* Data type definitions, declarations, prototypes.
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* For licencing details see kernel-base/COPYING
*/
#ifndef _LINUX_PERF_COUNTER_H
#define _LINUX_PERF_COUNTER_H
#include <linux/types.h>
#include <linux/ioctl.h>
#include <asm/byteorder.h>
/*
* User-space ABI bits:
*/
/*
* attr.type
*/
enum perf_type_id {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
PERF_TYPE_MAX, /* non-ABI */
};
/*
* Generalized performance counter event types, used by the
* attr.event_id parameter of the sys_perf_counter_open()
* syscall:
*/
enum perf_hw_id {
/*
* Common hardware events, generalized by the kernel:
*/
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_MAX, /* non-ABI */
};
/*
* Generalized hardware cache counters:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
/*
* Special "software" counters provided by the kernel, even if the hardware
* does not support performance counters. These counters measure various
* physical and sw events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
PERF_COUNT_SW_TASK_CLOCK = 1,
PERF_COUNT_SW_PAGE_FAULTS = 2,
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_MAX, /* non-ABI */
};
/*
* Bits that can be set in attr.sample_type to request information
* in the overflow packets.
*/
enum perf_counter_sample_format {
PERF_SAMPLE_IP = 1U << 0,
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
PERF_SAMPLE_READ = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
PERF_SAMPLE_PERIOD = 1U << 8,
PERF_SAMPLE_STREAM_ID = 1U << 9,
PERF_SAMPLE_RAW = 1U << 10,
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
};
/*
* The format of the data returned by read() on a perf counter fd,
* as specified by attr.read_format:
*
* struct read_format {
* { u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 id; } && PERF_FORMAT_ID
* } && !PERF_FORMAT_GROUP
*
* { u64 nr;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
* } cntr[nr];
* } && PERF_FORMAT_GROUP
* };
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
PERF_FORMAT_GROUP = 1U << 3,
PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
/*
* Hardware event to monitor via a performance monitoring counter:
*/
struct perf_counter_attr {
/*
* Major type: hardware/software/tracepoint/etc.
*/
__u32 type;
/*
* Size of the attr structure, for fwd/bwd compat.
*/
__u32 size;
/*
* Type specific configuration information.
*/
__u64 config;
union {
__u64 sample_period;
__u64 sample_freq;
};
__u64 sample_type;
__u64 read_format;
__u64 disabled : 1, /* off by default */
inherit : 1, /* children inherit it */
pinned : 1, /* must always be on PMU */
exclusive : 1, /* only group on PMU */
exclude_user : 1, /* don't count user */
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
mmap : 1, /* include mmap data */
comm : 1, /* include comm data */
freq : 1, /* use freq, not period */
inherit_stat : 1, /* per task counts */
enable_on_exec : 1, /* next exec enables */
task : 1, /* trace fork/exit */
watermark : 1, /* wakeup_watermark */
__reserved_1 : 49;
union {
__u32 wakeup_events; /* wakeup every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
__u32 __reserved_2;
__u64 __reserved_3;
};
/*
* Ioctls that can be done on a perf counter fd:
*/
#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0)
#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1)
#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2)
#define PERF_COUNTER_IOC_RESET _IO ('$', 3)
#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64)
#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5)
enum perf_counter_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
* Structure of the page that can be mapped via mmap
*/
struct perf_counter_mmap_page {
__u32 version; /* version number of this structure */
__u32 compat_version; /* lowest version this is compat with */
/*
* Bits needed to read the hw counters in user-space.
*
* u32 seq;
* s64 count;
*
* do {
* seq = pc->lock;
*
* barrier()
* if (pc->index) {
* count = pmc_read(pc->index - 1);
* count += pc->offset;
* } else
* goto regular_read;
*
* barrier();
* } while (pc->lock != seq);
*
* NOTE: for obvious reason this only works on self-monitoring
* processes.
*/
__u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware counter identifier */
__s64 offset; /* add to hardware counter value */
__u64 time_enabled; /* time counter active */
__u64 time_running; /* time counter on cpu */
/*
* Hole for extension of the self monitor capabilities
*/
__u64 __reserved[123]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
*
* User-space reading the @data_head value should issue an rmb(), on
* SMP capable platforms, after reading this value -- see
* perf_counter_wakeup().
*
* When the mapping is PROT_WRITE the @data_tail value should be
* written by userspace to reflect the last read data. In this case
* the kernel will not over-write unread data.
*/
__u64 data_head; /* head in the data section */
__u64 data_tail; /* user-space written tail */
};
#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0)
#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_EVENT_MISC_KERNEL (1 << 0)
#define PERF_EVENT_MISC_USER (2 << 0)
#define PERF_EVENT_MISC_HYPERVISOR (3 << 0)
struct perf_event_header {
__u32 type;
__u16 misc;
__u16 size;
};
enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
* correlate userspace IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
*
* u32 pid, tid;
* u64 addr;
* u64 len;
* u64 pgoff;
* char filename[];
* };
*/
PERF_EVENT_MMAP = 1,
/*
* struct {
* struct perf_event_header header;
* u64 id;
* u64 lost;
* };
*/
PERF_EVENT_LOST = 2,
/*
* struct {
* struct perf_event_header header;
*
* u32 pid, tid;
* char comm[];
* };
*/
PERF_EVENT_COMM = 3,
/*
* struct {
* struct perf_event_header header;
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
* };
*/
PERF_EVENT_EXIT = 4,
/*
* struct {
* struct perf_event_header header;
* u64 time;
* u64 id;
* u64 stream_id;
* };
*/
PERF_EVENT_THROTTLE = 5,
PERF_EVENT_UNTHROTTLE = 6,
/*
* struct {
* struct perf_event_header header;
* u32 pid, ppid;
* u32 tid, ptid;
* { u64 time; } && PERF_SAMPLE_TIME
* };
*/
PERF_EVENT_FORK = 7,
/*
* struct {
* struct perf_event_header header;
* u32 pid, tid;
*
* struct read_format values;
* };
*/
PERF_EVENT_READ = 8,
/*
* struct {
* struct perf_event_header header;
*
* { u64 ip; } && PERF_SAMPLE_IP
* { u32 pid, tid; } && PERF_SAMPLE_TID
* { u64 time; } && PERF_SAMPLE_TIME
* { u64 addr; } && PERF_SAMPLE_ADDR
* { u64 id; } && PERF_SAMPLE_ID
* { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
* { struct read_format values; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
*
* #
* # The RAW record below is opaque data wrt the ABI
* #
* # That is, the ABI doesn't make any promises wrt to
* # the stability of its content, it may vary depending
* # on event, hardware, kernel version and phase of
* # the moon.
* #
* # In other words, PERF_SAMPLE_RAW contents are not an ABI.
* #
*
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
* };
*/
PERF_EVENT_SAMPLE = 9,
PERF_EVENT_MAX, /* non-ABI */
};
enum perf_callchain_context {
PERF_CONTEXT_HV = (__u64)-32,
PERF_CONTEXT_KERNEL = (__u64)-128,
PERF_CONTEXT_USER = (__u64)-512,
PERF_CONTEXT_GUEST = (__u64)-2048,
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
PERF_CONTEXT_GUEST_USER = (__u64)-2560,
PERF_CONTEXT_MAX = (__u64)-4095,
};
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
/*
* In case some app still references the old symbols:
*/
#define __NR_perf_counter_open __NR_perf_event_open
#define PR_TASK_PERF_COUNTERS_DISABLE PR_TASK_PERF_EVENTS_DISABLE
#define PR_TASK_PERF_COUNTERS_ENABLE PR_TASK_PERF_EVENTS_ENABLE
#endif /* _LINUX_PERF_COUNTER_H */
...@@ -395,7 +395,7 @@ enum perf_event_type { ...@@ -395,7 +395,7 @@ enum perf_event_type {
* # * #
* # That is, the ABI doesn't make any promises wrt to * # That is, the ABI doesn't make any promises wrt to
* # the stability of its content, it may vary depending * # the stability of its content, it may vary depending
* # on event_id, hardware, kernel version and phase of * # on event, hardware, kernel version and phase of
* # the moon. * # the moon.
* # * #
* # In other words, PERF_SAMPLE_RAW contents are not an ABI. * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
......
...@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS ...@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS
help help
See tools/perf/design.txt for details. See tools/perf/design.txt for details.
menu "Performance Counters" menu "Kernel Performance Events And Counters"
config PERF_EVENTS config PERF_EVENTS
bool "Kernel Performance Counters" bool "Kernel performance events and counters"
default y if PROFILING default y if (PROFILING || PERF_COUNTERS)
depends on HAVE_PERF_EVENTS depends on HAVE_PERF_EVENTS
select ANON_INODES select ANON_INODES
help help
Enable kernel support for performance counter hardware. Enable kernel support for various performance events provided
by software and hardware.
Performance counters are special hardware registers available Software events are supported either build-in or via the
on most modern CPUs. These registers count the number of certain use of generic tracepoints.
Most modern CPUs support performance events via performance
counter registers. These registers count the number of certain
types of hw events: such as instructions executed, cachemisses types of hw events: such as instructions executed, cachemisses
suffered, or branches mis-predicted - without slowing down the suffered, or branches mis-predicted - without slowing down the
kernel or applications. These registers can also trigger interrupts kernel or applications. These registers can also trigger interrupts
when a threshold number of events have passed - and can thus be when a threshold number of events have passed - and can thus be
used to profile the code that runs on that CPU. used to profile the code that runs on that CPU.
The Linux Performance Counter subsystem provides an abstraction of The Linux Performance Event subsystem provides an abstraction of
these hardware capabilities, available via a system call. It these software and hardware cevent apabilities, available via a
system call and used by the "perf" utility in tools/perf/. It
provides per task and per CPU counters, and it provides event provides per task and per CPU counters, and it provides event
capabilities on top of those. capabilities on top of those.
...@@ -950,14 +955,26 @@ config EVENT_PROFILE ...@@ -950,14 +955,26 @@ config EVENT_PROFILE
depends on PERF_EVENTS && EVENT_TRACING depends on PERF_EVENTS && EVENT_TRACING
default y default y
help help
Allow the use of tracepoints as software performance counters. Allow the use of tracepoints as software performance events.
When this is enabled, you can create perf counters based on When this is enabled, you can create perf events based on
tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
found in debugfs://tracing/events/*/*/id. (The -e/--events found in debugfs://tracing/events/*/*/id. (The -e/--events
option to the perf tool can parse and interpret symbolic option to the perf tool can parse and interpret symbolic
tracepoints, in the subsystem:tracepoint_name format.) tracepoints, in the subsystem:tracepoint_name format.)
config PERF_COUNTERS
bool "Kernel performance counters (old config option)"
depends on HAVE_PERF_EVENTS
help
This config has been obsoleted by the PERF_EVENTS
config option - please see that one for details.
It has no effect on the kernel whether you enable
it or not, it is a compatibility placeholder.
Say N if unsure.
endmenu endmenu
config VM_EVENT_COUNTERS config VM_EVENT_COUNTERS
......
/* /*
* Performance event core code * Performance events core code:
* *
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment