Commit 58daf18c authored by Russell King's avatar Russell King

Merge branch 'clksrc' into devel

Conflicts:
	arch/arm/mach-vexpress/v2m.c
	arch/arm/plat-omap/counter_32k.c
	arch/arm/plat-versatile/Makefile
parents aa312be1 0af85dda
......@@ -14,6 +14,7 @@ config ARM
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
select HAVE_GENERIC_DMA_COHERENT
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
......@@ -38,6 +39,9 @@ config HAVE_PWM
config SYS_SUPPORTS_APM_EMULATION
bool
config HAVE_SCHED_CLOCK
bool
config GENERIC_GPIO
bool
......@@ -233,6 +237,7 @@ config ARCH_REALVIEW
bool "ARM Ltd. RealView family"
select ARM_AMBA
select COMMON_CLKDEV
select HAVE_SCHED_CLOCK
select ICST
select GENERIC_CLOCKEVENTS
select ARCH_WANT_OPTIONAL_GPIOLIB
......@@ -247,6 +252,7 @@ config ARCH_VERSATILE
select ARM_AMBA
select ARM_VIC
select COMMON_CLKDEV
select HAVE_SCHED_CLOCK
select ICST
select GENERIC_CLOCKEVENTS
select ARCH_WANT_OPTIONAL_GPIOLIB
......@@ -263,6 +269,7 @@ config ARCH_VEXPRESS
select COMMON_CLKDEV
select GENERIC_CLOCKEVENTS
select HAVE_CLK
select HAVE_SCHED_CLOCK
select ICST
select PLAT_VERSATILE
help
......@@ -434,6 +441,7 @@ config ARCH_IXP4XX
select CPU_XSCALE
select GENERIC_GPIO
select GENERIC_CLOCKEVENTS
select HAVE_SCHED_CLOCK
select DMABOUNCE if PCI
help
Support for Intel's IXP4XX (XScale) family of processors.
......@@ -509,6 +517,7 @@ config ARCH_MMP
select ARCH_REQUIRE_GPIOLIB
select COMMON_CLKDEV
select GENERIC_CLOCKEVENTS
select HAVE_SCHED_CLOCK
select TICK_ONESHOT
select PLAT_PXA
select SPARSE_IRQ
......@@ -565,6 +574,7 @@ config ARCH_TEGRA
select GENERIC_CLOCKEVENTS
select GENERIC_GPIO
select HAVE_CLK
select HAVE_SCHED_CLOCK
select COMMON_CLKDEV
select ARCH_HAS_BARRIERS if CACHE_L2X0
select ARCH_HAS_CPUFREQ
......@@ -588,6 +598,7 @@ config ARCH_PXA
select COMMON_CLKDEV
select ARCH_REQUIRE_GPIOLIB
select GENERIC_CLOCKEVENTS
select HAVE_SCHED_CLOCK
select TICK_ONESHOT
select PLAT_PXA
select SPARSE_IRQ
......@@ -636,6 +647,7 @@ config ARCH_SA1100
select CPU_FREQ
select GENERIC_CLOCKEVENTS
select HAVE_CLK
select HAVE_SCHED_CLOCK
select TICK_ONESHOT
select ARCH_REQUIRE_GPIOLIB
help
......@@ -782,6 +794,7 @@ config ARCH_U300
bool "ST-Ericsson U300 Series"
depends on MMU
select CPU_ARM926T
select HAVE_SCHED_CLOCK
select HAVE_TCM
select ARM_AMBA
select ARM_VIC
......@@ -830,6 +843,7 @@ config ARCH_OMAP
select ARCH_REQUIRE_GPIOLIB
select ARCH_HAS_CPUFREQ
select GENERIC_CLOCKEVENTS
select HAVE_SCHED_CLOCK
select ARCH_HAS_HOLES_MEMORYMODEL
help
Support for TI's OMAP platform (OMAP1/2/3/4).
......@@ -983,9 +997,11 @@ config ARCH_ACORN
config PLAT_IOP
bool
select GENERIC_CLOCKEVENTS
select HAVE_SCHED_CLOCK
config PLAT_ORION
bool
select HAVE_SCHED_CLOCK
config PLAT_PXA
bool
......@@ -1212,10 +1228,11 @@ config SMP
depends on EXPERIMENTAL
depends on GENERIC_CLOCKEVENTS
depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \
MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \
ARCH_MSM_SCORPIONMP
select USE_GENERIC_SMP_HELPERS
select HAVE_ARM_SCU
select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP
help
This enables support for systems with more than one CPU. If you have
a system with only one CPU, like most personal computers, say N. If
......@@ -1290,6 +1307,7 @@ config NR_CPUS
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
depends on SMP && HOTPLUG && EXPERIMENTAL
depends on !ARCH_MSM
help
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
......@@ -1298,7 +1316,7 @@ config LOCAL_TIMERS
bool "Use local timer interrupts"
depends on SMP
default y
select HAVE_ARM_TWD
select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP
help
Enable support for local timers on SMP platforms, rather then the
legacy IPI broadcast method. Local timers allows the system
......
......@@ -23,7 +23,7 @@ config STRICT_DEVMEM
config FRAME_POINTER
bool
depends on !THUMB2_KERNEL
default y if !ARM_UNWIND
default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER
help
If you say N here, the resulting kernel will be slightly smaller and
faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled,
......
......@@ -44,7 +44,6 @@ static struct clocksource clocksource_sp804 = {
.rating = 200,
.read = sp804_read,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -61,8 +60,7 @@ void __init sp804_clocksource_init(void __iomem *base)
writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
clksrc_base + TIMER_CTRL);
cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift);
clocksource_register(cs);
clocksource_register_khz(cs, TIMER_FREQ_KHZ);
}
......
/*
* sched_clock.h: support for extending counters to full 64-bit ns counter
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef ASM_SCHED_CLOCK
#define ASM_SCHED_CLOCK
#include <linux/kernel.h>
#include <linux/types.h>
struct clock_data {
u64 epoch_ns;
u32 epoch_cyc;
u32 epoch_cyc_copy;
u32 mult;
u32 shift;
};
#define DEFINE_CLOCK_DATA(name) struct clock_data name
static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
return (cyc * mult) >> shift;
}
/*
* Atomically update the sched_clock epoch. Your update callback will
* be called from a timer before the counter wraps - read the current
* counter value, and call this function to safely move the epochs
* forward. Only use this from the update callback.
*/
static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
{
unsigned long flags;
u64 ns = cd->epoch_ns +
cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);
/*
* Write epoch_cyc and epoch_ns in a way that the update is
* detectable in cyc_to_fixed_sched_clock().
*/
raw_local_irq_save(flags);
cd->epoch_cyc = cyc;
smp_wmb();
cd->epoch_ns = ns;
smp_wmb();
cd->epoch_cyc_copy = cyc;
raw_local_irq_restore(flags);
}
/*
* If your clock rate is known at compile time, using this will allow
* you to optimize the mult/shift loads away. This is paired with
* init_fixed_sched_clock() to ensure that your mult/shift are correct.
*/
static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd,
u32 cyc, u32 mask, u32 mult, u32 shift)
{
u64 epoch_ns;
u32 epoch_cyc;
/*
* Load the epoch_cyc and epoch_ns atomically. We do this by
* ensuring that we always write epoch_cyc, epoch_ns and
* epoch_cyc_copy in strict order, and read them in strict order.
* If epoch_cyc and epoch_cyc_copy are not equal, then we're in
* the middle of an update, and we should repeat the load.
*/
do {
epoch_cyc = cd->epoch_cyc;
smp_rmb();
epoch_ns = cd->epoch_ns;
smp_rmb();
} while (epoch_cyc != cd->epoch_cyc_copy);
return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift);
}
/*
* Otherwise, you need to use this, which will obtain the mult/shift
* from the clock_data structure. Use init_sched_clock() with this.
*/
static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
u32 cyc, u32 mask)
{
return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift);
}
/*
* Initialize the clock data - calculate the appropriate multiplier
* and shift. Also setup a timer to ensure that the epoch is refreshed
* at the appropriate time interval, which will call your update
* handler.
*/
void init_sched_clock(struct clock_data *, void (*)(void),
unsigned int, unsigned long);
/*
* Use this initialization function rather than init_sched_clock() if
* you're using cyc_to_fixed_sched_clock, which will warn if your
* constants are incorrect.
*/
static inline void init_fixed_sched_clock(struct clock_data *cd,
void (*update)(void), unsigned int bits, unsigned long rate,
u32 mult, u32 shift)
{
init_sched_clock(cd, update, bits, rate);
if (cd->mult != mult || cd->shift != shift) {
pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n"
"sched_clock: fix multiply/shift to avoid scheduler hiccups\n",
mult, shift, cd->mult, cd->shift);
}
}
#endif
......@@ -63,6 +63,11 @@
#include <asm/outercache.h>
#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
#else
#define __exception_irq_entry __exception
#endif
struct thread_info;
struct task_struct;
......
......@@ -15,13 +15,32 @@ struct undef_hook {
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
extern char __irqentry_text_start[];
extern char __irqentry_text_end[];
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
#else
static inline int __in_irqentry_text(unsigned long ptr)
{
return 0;
}
#endif
static inline int in_exception_text(unsigned long ptr)
{
extern char __exception_text_start[];
extern char __exception_text_end[];
int in;
in = ptr >= (unsigned long)&__exception_text_start &&
ptr < (unsigned long)&__exception_text_end;
return ptr >= (unsigned long)&__exception_text_start &&
ptr < (unsigned long)&__exception_text_end;
return in ? : __in_irqentry_text(ptr);
}
extern void __init early_trap_init(void);
......
......@@ -5,7 +5,7 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
ifdef CONFIG_DYNAMIC_FTRACE
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = -pg
endif
......@@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o
obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
obj-$(CONFIG_ATAGS_PROC) += atags.o
......
......@@ -147,98 +147,170 @@ ENDPROC(ret_from_fork)
#endif
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(__gnu_mcount_nc)
mov ip, lr
ldmia sp!, {lr}
mov pc, ip
ENDPROC(__gnu_mcount_nc)
.macro __mcount suffix
mcount_enter
ldr r0, =ftrace_trace_function
ldr r2, [r0]
adr r0, .Lftrace_stub
cmp r0, r2
bne 1f
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ldr r1, =ftrace_graph_return
ldr r2, [r1]
cmp r0, r2
bne ftrace_graph_caller\suffix
ldr r1, =ftrace_graph_entry
ldr r2, [r1]
ldr r0, =ftrace_graph_entry_stub
cmp r0, r2
bne ftrace_graph_caller\suffix
#endif
ENTRY(ftrace_caller)
stmdb sp!, {r0-r3, lr}
mov r0, lr
mcount_exit
1: mcount_get_lr r1 @ lr of instrumented func
mov r0, lr @ instrumented function
sub r0, r0, #MCOUNT_INSN_SIZE
adr lr, BSYM(2f)
mov pc, r2
2: mcount_exit
.endm
.macro __ftrace_caller suffix
mcount_enter
mcount_get_lr r1 @ lr of instrumented func
mov r0, lr @ instrumented function
sub r0, r0, #MCOUNT_INSN_SIZE
ldr r1, [sp, #20]
.global ftrace_call
ftrace_call:
.globl ftrace_call\suffix
ftrace_call\suffix:
bl ftrace_stub
ldmia sp!, {r0-r3, ip, lr}
mov pc, ip
ENDPROC(ftrace_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call\suffix
ftrace_graph_call\suffix:
mov r0, r0
#endif
mcount_exit
.endm
.macro __ftrace_graph_caller
sub r0, fp, #4 @ &lr of instrumented routine (&parent)
#ifdef CONFIG_DYNAMIC_FTRACE
@ called from __ftrace_caller, saved in mcount_enter
ldr r1, [sp, #16] @ instrumented routine (func)
#else
@ called from __mcount, untouched in lr
mov r1, lr @ instrumented routine (func)
#endif
sub r1, r1, #MCOUNT_INSN_SIZE
mov r2, fp @ frame pointer
bl prepare_ftrace_return
mcount_exit
.endm
#ifdef CONFIG_OLD_MCOUNT
/*
* mcount
*/
.macro mcount_enter
stmdb sp!, {r0-r3, lr}
.endm
.macro mcount_get_lr reg
ldr \reg, [fp, #-4]
.endm
.macro mcount_exit
ldr lr, [fp, #-4]
ldmia sp!, {r0-r3, pc}
.endm
ENTRY(mcount)
#ifdef CONFIG_DYNAMIC_FTRACE
stmdb sp!, {lr}
ldr lr, [fp, #-4]
ldmia sp!, {pc}
#else
__mcount _old
#endif
ENDPROC(mcount)
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(ftrace_caller_old)
stmdb sp!, {r0-r3, lr}
ldr r1, [fp, #-4]
mov r0, lr
sub r0, r0, #MCOUNT_INSN_SIZE
.globl ftrace_call_old
ftrace_call_old:
bl ftrace_stub
ldr lr, [fp, #-4] @ restore lr
ldmia sp!, {r0-r3, pc}
__ftrace_caller _old
ENDPROC(ftrace_caller_old)
#endif
#else
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller_old)
__ftrace_graph_caller
ENDPROC(ftrace_graph_caller_old)
#endif
ENTRY(__gnu_mcount_nc)
.purgem mcount_enter
.purgem mcount_get_lr
.purgem mcount_exit
#endif
/*
* __gnu_mcount_nc
*/
.macro mcount_enter
stmdb sp!, {r0-r3, lr}
ldr r0, =ftrace_trace_function
ldr r2, [r0]
adr r0, .Lftrace_stub
cmp r0, r2
bne gnu_trace
.endm
.macro mcount_get_lr reg
ldr \reg, [sp, #20]
.endm
.macro mcount_exit
ldmia sp!, {r0-r3, ip, lr}
mov pc, ip
.endm
gnu_trace:
ldr r1, [sp, #20] @ lr of instrumented routine
mov r0, lr
sub r0, r0, #MCOUNT_INSN_SIZE
adr lr, BSYM(1f)
mov pc, r2
1:
ldmia sp!, {r0-r3, ip, lr}
ENTRY(__gnu_mcount_nc)
#ifdef CONFIG_DYNAMIC_FTRACE
mov ip, lr
ldmia sp!, {lr}
mov pc, ip
#else
__mcount
#endif
ENDPROC(__gnu_mcount_nc)
#ifdef CONFIG_OLD_MCOUNT
/*
* This is under an ifdef in order to force link-time errors for people trying
* to build with !FRAME_POINTER with a GCC which doesn't use the new-style
* mcount.
*/
ENTRY(mcount)
stmdb sp!, {r0-r3, lr}
ldr r0, =ftrace_trace_function
ldr r2, [r0]
adr r0, ftrace_stub
cmp r0, r2
bne trace
ldr lr, [fp, #-4] @ restore lr
ldmia sp!, {r0-r3, pc}
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(ftrace_caller)
__ftrace_caller
ENDPROC(ftrace_caller)
#endif
trace:
ldr r1, [fp, #-4] @ lr of instrumented routine
mov r0, lr
sub r0, r0, #MCOUNT_INSN_SIZE
mov lr, pc
mov pc, r2
ldr lr, [fp, #-4] @ restore lr
ldmia sp!, {r0-r3, pc}
ENDPROC(mcount)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
__ftrace_graph_caller
ENDPROC(ftrace_graph_caller)
#endif
#endif /* CONFIG_DYNAMIC_FTRACE */
.purgem mcount_enter
.purgem mcount_get_lr
.purgem mcount_exit
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl return_to_handler
return_to_handler:
stmdb sp!, {r0-r3}
mov r0, fp @ frame pointer
bl ftrace_return_to_handler
mov lr, r0 @ r0 has real ret addr
ldmia sp!, {r0-r3}
mov pc, lr
#endif
ENTRY(ftrace_stub)
.Lftrace_stub:
......
......@@ -24,6 +24,7 @@
#define NOP 0xe8bd4000 /* pop {lr} */
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
#ifdef CONFIG_OLD_MCOUNT
#define OLD_MCOUNT_ADDR ((unsigned long) mcount)
#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
......@@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
}
#endif
/* construct a branch (BL) instruction to addr */
#ifdef CONFIG_THUMB2_KERNEL
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
bool link)
{
unsigned long s, j1, j2, i1, i2, imm10, imm11;
unsigned long first, second;
......@@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
j2 = (!i2) ^ s;
first = 0xf000 | (s << 10) | imm10;
second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11;
second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
if (link)
second |= 1 << 14;
return (second << 16) | first;
}
#else
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
bool link)
{
unsigned long opcode = 0xea000000;
long offset;
if (link)
opcode |= 1 << 24;
offset = (long)addr - (long)(pc + 8);
if (unlikely(offset < -33554432 || offset > 33554428)) {
/* Can't generate branches that far (from ARM ARM). Ftrace
......@@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
offset = (offset >> 2) & 0x00ffffff;
return 0xeb000000 | offset;
return opcode | offset;
}
#endif
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
{
return ftrace_gen_branch(pc, addr, true);
}
static int ftrace_modify_code(unsigned long pc, unsigned long old,
unsigned long new)
{
......@@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
struct ftrace_graph_ent trace;
unsigned long old;
int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
old = *parent;
*parent = return_hooker;
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer);
if (err == -EBUSY) {
*parent = old;
return;
}
trace.func = self_addr;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
*parent = old;
}
}
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_graph_call;
extern unsigned long ftrace_graph_call_old;
extern void ftrace_graph_caller_old(void);
static int __ftrace_modify_caller(unsigned long *callsite,
void (*func) (void), bool enable)
{
unsigned long caller_fn = (unsigned long) func;
unsigned long pc = (unsigned long) callsite;
unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
unsigned long nop = 0xe1a00000; /* mov r0, r0 */
unsigned long old = enable ? nop : branch;
unsigned long new = enable ? branch : nop;
return ftrace_modify_code(pc, old, new);
}
static int ftrace_modify_graph_caller(bool enable)
{
int ret;
ret = __ftrace_modify_caller(&ftrace_graph_call,
ftrace_graph_caller,
enable);
#ifdef CONFIG_OLD_MCOUNT
if (!ret)
ret = __ftrace_modify_caller(&ftrace_graph_call_old,
ftrace_graph_caller_old,
enable);
#endif
return ret;
}
int ftrace_enable_ftrace_graph_caller(void)
{
return ftrace_modify_graph_caller(true);
}
int ftrace_disable_ftrace_graph_caller(void)
{
return ftrace_modify_graph_caller(false);
}
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
......@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/kallsyms.h>
#include <linux/proc_fs.h>
#include <linux/ftrace.h>
#include <asm/system.h>
#include <asm/mach/irq.h>
......@@ -105,7 +106,8 @@ int show_interrupts(struct seq_file *p, void *v)
* come via this function. Instead, they should provide their
* own 'handler'
*/
asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
asmlinkage void __exception_irq_entry
asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
......
......@@ -4,9 +4,7 @@
* ARM performance counter support.
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
*
* ARMv7 support: Jean Pihet <jpihet@mvista.com>
* 2010 (c) MontaVista Software, LLC.
* Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
*
* This code is based on the sparc64 perf event code, which is in turn based
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
......@@ -69,29 +67,23 @@ struct cpu_hw_events {
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
/* PMU names. */
static const char *arm_pmu_names[] = {
[ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
[ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
[ARM_PERF_PMU_ID_V6] = "v6",
[ARM_PERF_PMU_ID_V6MP] = "v6mpcore",
[ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8",
[ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9",
};
struct arm_pmu {
enum arm_perf_pmu_ids id;
const char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
int (*event_map)(int evt);
u64 (*raw_event)(u64);
int (*get_event_idx)(struct cpu_hw_events *cpuc,
struct hw_perf_event *hwc);
u32 (*read_counter)(int idx);
void (*write_counter)(int idx, u32 val);
void (*start)(void);
void (*stop)(void);
const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
const unsigned (*event_map)[PERF_COUNT_HW_MAX];
u32 raw_event_mask;
int num_events;
u64 max_period;
};
......@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
#define CACHE_OP_UNSUPPORTED 0xFFFF
static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
static int
armpmu_map_cache_event(u64 config)
{
......@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config)
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
if (ret == CACHE_OP_UNSUPPORTED)
return -ENOENT;
......@@ -165,6 +153,19 @@ armpmu_map_cache_event(u64 config)
return ret;
}
static int
armpmu_map_event(u64 config)
{
int mapping = (*armpmu->event_map)[config];
return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
}
static int
armpmu_map_raw_event(u64 config)
{
return (int)(config & armpmu->raw_event_mask);
}
static int
armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
......@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event)
/* Decode the generic type into an ARM event identifier. */
if (PERF_TYPE_HARDWARE == event->attr.type) {
mapping = armpmu->event_map(event->attr.config);
mapping = armpmu_map_event(event->attr.config);
} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
mapping = armpmu_map_cache_event(event->attr.config);
} else if (PERF_TYPE_RAW == event->attr.type) {
mapping = armpmu->raw_event(event->attr.config);
mapping = armpmu_map_raw_event(event->attr.config);
} else {
pr_debug("event type %x not supported\n", event->attr.type);
return -EOPNOTSUPP;
......@@ -603,2366 +604,10 @@ static struct pmu pmu = {
.read = armpmu_read,
};
/*
* ARMv6 Performance counter handling code.
*
* ARMv6 has 2 configurable performance counters and a single cycle counter.
* They all share a single reset bit but can be written to zero so we can use
* that for a reset.
*
* The counters can't be individually enabled or disabled so when we remove
* one event and replace it with another we could get spurious counts from the
* wrong event. However, we can take advantage of the fact that the
* performance counters can export events to the event bus, and the event bus
* itself can be monitored. This requires that we *don't* export the events to
* the event bus. The procedure for disabling a configurable counter is:
* - change the counter to count the ETMEXTOUT[0] signal (0x20). This
* effectively stops the counter from counting.
* - disable the counter's interrupt generation (each counter has it's
* own interrupt enable bit).
* Once stopped, the counter value can be written as 0 to reset.
*
* To enable a counter:
* - enable the counter's interrupt generation.
* - set the new event type.
*
* Note: the dedicated cycle counter only counts cycles and can't be
* enabled/disabled independently of the others. When we want to disable the
* cycle counter, we have to just disable the interrupt reporting and start
* ignoring that counter. When re-enabling, we have to reset the value and
* enable the interrupt.
*/
enum armv6_perf_types {
ARMV6_PERFCTR_ICACHE_MISS = 0x0,
ARMV6_PERFCTR_IBUF_STALL = 0x1,
ARMV6_PERFCTR_DDEP_STALL = 0x2,
ARMV6_PERFCTR_ITLB_MISS = 0x3,
ARMV6_PERFCTR_DTLB_MISS = 0x4,
ARMV6_PERFCTR_BR_EXEC = 0x5,
ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
ARMV6_PERFCTR_INSTR_EXEC = 0x7,
ARMV6_PERFCTR_DCACHE_HIT = 0x9,
ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
ARMV6_PERFCTR_DCACHE_MISS = 0xB,
ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
ARMV6_PERFCTR_NOP = 0x20,
};
enum armv6_counters {
ARMV6_CYCLE_COUNTER = 1,
ARMV6_COUNTER0,
ARMV6_COUNTER1,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
enum armv6mpcore_perf_types {
ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
static inline unsigned long
armv6_pmcr_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
return val;
}
static inline void
armv6_pmcr_write(unsigned long val)
{
asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
}
#define ARMV6_PMCR_ENABLE (1 << 0)
#define ARMV6_PMCR_CTR01_RESET (1 << 1)
#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
#define ARMV6_PMCR_OVERFLOWED_MASK \
(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
ARMV6_PMCR_CCOUNT_OVERFLOW)
static inline int
armv6_pmcr_has_overflowed(unsigned long pmcr)
{
return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
}
static inline int
armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
enum armv6_counters counter)
{
int ret = 0;
if (ARMV6_CYCLE_COUNTER == counter)
ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
else if (ARMV6_COUNTER0 == counter)
ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
else if (ARMV6_COUNTER1 == counter)
ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
return ret;
}
static inline u32
armv6pmu_read_counter(int counter)
{
unsigned long value = 0;
if (ARMV6_CYCLE_COUNTER == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
else if (ARMV6_COUNTER0 == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
else if (ARMV6_COUNTER1 == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
return value;
}
static inline void
armv6pmu_write_counter(int counter,
u32 value)
{
if (ARMV6_CYCLE_COUNTER == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
else if (ARMV6_COUNTER0 == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
else if (ARMV6_COUNTER1 == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
void
armv6pmu_enable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, evt, flags;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = 0;
evt = ARMV6_PMCR_CCOUNT_IEN;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_EVT_COUNT0_MASK;
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
ARMV6_PMCR_COUNT0_IEN;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_EVT_COUNT1_MASK;
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
ARMV6_PMCR_COUNT1_IEN;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static irqreturn_t
armv6pmu_handle_irq(int irq_num,
void *dev)
{
unsigned long pmcr = armv6_pmcr_read();
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
if (!armv6_pmcr_has_overflowed(pmcr))
return IRQ_NONE;
regs = get_irq_regs();
/*
* The interrupts are cleared by writing the overflow flags back to
* the control register. All of the other bits don't have any effect
* if they are rewritten, so write the whole value back.
*/
armv6_pmcr_write(pmcr);
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run();
return IRQ_HANDLED;
}
static void
armv6pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val |= ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
void
armv6pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline int
armv6pmu_event_map(int config)
{
int mapping = armv6_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static inline int
armv6mpcore_pmu_event_map(int config)
{
int mapping = armv6mpcore_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static u64
armv6pmu_raw_event(u64 config)
{
return config & 0xff;
}
static int
armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
/* Always place a cycle counter into the cycle counter. */
if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return ARMV6_CYCLE_COUNTER;
} else {
/*
* For anything other than a cycle counter, try and use
* counter0 and counter1.
*/
if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
return ARMV6_COUNTER1;
}
if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
return ARMV6_COUNTER0;
}
/* The counters are all in use. */
return -EAGAIN;
}
}
static void
armv6pmu_disable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, evt, flags;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = ARMV6_PMCR_CCOUNT_IEN;
evt = 0;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Mask out the current event and set the counter to count the number
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, flags, evt = 0;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = ARMV6_PMCR_CCOUNT_IEN;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_COUNT0_IEN;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_COUNT1_IEN;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static const struct arm_pmu armv6pmu = {
.id = ARM_PERF_PMU_ID_V6,
.handle_irq = armv6pmu_handle_irq,
.enable = armv6pmu_enable_event,
.disable = armv6pmu_disable_event,
.event_map = armv6pmu_event_map,
.raw_event = armv6pmu_raw_event,
.read_counter = armv6pmu_read_counter,
.write_counter = armv6pmu_write_counter,
.get_event_idx = armv6pmu_get_event_idx,
.start = armv6pmu_start,
.stop = armv6pmu_stop,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
/*
* ARMv6mpcore is almost identical to single core ARMv6 with the exception
* that some of the events have different enumerations and that there is no
* *hack* to stop the programmable counters. To stop the counters we simply
* disable the interrupt reporting and update the event. When unthrottling we
* reset the period and enable the interrupt reporting.
*/
static const struct arm_pmu armv6mpcore_pmu = {
.id = ARM_PERF_PMU_ID_V6MP,
.handle_irq = armv6pmu_handle_irq,
.enable = armv6pmu_enable_event,
.disable = armv6mpcore_pmu_disable_event,
.event_map = armv6mpcore_pmu_event_map,
.raw_event = armv6pmu_raw_event,
.read_counter = armv6pmu_read_counter,
.write_counter = armv6pmu_write_counter,
.get_event_idx = armv6pmu_get_event_idx,
.start = armv6pmu_start,
.stop = armv6pmu_stop,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
/*
* ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
*
* Copied from ARMv6 code, with the low level code inspired
* by the ARMv7 Oprofile code.
*
* Cortex-A8 has up to 4 configurable performance counters and
* a single cycle counter.
* Cortex-A9 has up to 31 configurable performance counters and
* a single cycle counter.
*
* All counters can be enabled/disabled and IRQ masked separately. The cycle
* counter and all 4 performance counters together can be reset separately.
*/
/* Common ARMv7 event types */
enum armv7_perf_types {
ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
ARMV7_PERFCTR_IFETCH_MISS = 0x01,
ARMV7_PERFCTR_ITLB_MISS = 0x02,
ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
ARMV7_PERFCTR_DTLB_REFILL = 0x05,
ARMV7_PERFCTR_DREAD = 0x06,
ARMV7_PERFCTR_DWRITE = 0x07,
ARMV7_PERFCTR_EXC_TAKEN = 0x09,
ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
ARMV7_PERFCTR_CID_WRITE = 0x0B,
/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
* It counts:
* - all branch instructions,
* - instructions that explicitly write the PC,
* - exception generating instructions.
*/
ARMV7_PERFCTR_PC_WRITE = 0x0C,
ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
ARMV7_PERFCTR_CPU_CYCLES = 0xFF
};
/* ARMv7 Cortex-A8 specific event types */
enum armv7_a8_perf_types {
ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
ARMV7_PERFCTR_L2_ACCESS = 0x43,
ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
ARMV7_PERFCTR_L2_NEON = 0x4E,
ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
ARMV7_PERFCTR_L1_INST = 0x50,
ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
ARMV7_PERFCTR_OP_EXECUTED = 0x55,
ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
ARMV7_PERFCTR_CYCLES_INST = 0x57,
ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
ARMV7_PERFCTR_PMU_EVENTS = 0x72,
};
/* ARMv7 Cortex-A9 specific event types */
enum armv7_a9_perf_types {
ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
ARMV7_PERFCTR_DATA_EVICTION = 0x65,
ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
ARMV7_PERFCTR_ISB_INST = 0x90,
ARMV7_PERFCTR_DSB_INST = 0x91,
ARMV7_PERFCTR_DMB_INST = 0x92,
ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
};
/*
* Cortex-A8 HW events mapping
*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
/*
* Cortex-A9 HW events mapping
*/
static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] =
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
/*
* Perf Events counters
*/
enum armv7_counters {
ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
ARMV7_COUNTER0 = 2, /* First event counter */
};
/*
* The cycle counter is ARMV7_CYCLE_COUNTER.
* The first event counter is ARMV7_COUNTER0.
* The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
*/
#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
/*
* ARMv7 low level PMNC access
*/
/*
* Per-CPU PMNC: config reg
*/
#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
#define ARMV7_PMNC_N_MASK 0x1f
#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
/*
* Available counters
*/
#define ARMV7_CNT0 0 /* First event counter */
#define ARMV7_CCNT 31 /* Cycle counter */
/* Perf Event to low level counters mapping */
#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
/*
* CNTENS: counters enable reg
*/
#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
/*
* CNTENC: counters disable reg
*/
#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
/*
* INTENS: counters overflow interrupt enable reg
*/
#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
/*
* INTENC: counters overflow interrupt disable reg
*/
#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
/*
* EVTSEL: Event selection reg
*/
#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
/*
* SELECT: Counter selection reg
*/
#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
/*
* FLAG: counters overflow flag status reg
*/
#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
static inline unsigned long armv7_pmnc_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
return val;
}
static inline void armv7_pmnc_write(unsigned long val)
{
val &= ARMV7_PMNC_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
}
static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
{
return pmnc & ARMV7_OVERFLOWED_MASK;
}
static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
enum armv7_counters counter)
{
int ret = 0;
if (counter == ARMV7_CYCLE_COUNTER)
ret = pmnc & ARMV7_FLAG_C;
else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
ret = pmnc & ARMV7_FLAG_P(counter);
else
pr_err("CPU%u checking wrong counter %d overflow status\n",
smp_processor_id(), counter);
return ret;
}
static inline int armv7_pmnc_select_counter(unsigned int idx)
{
u32 val;
if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
pr_err("CPU%u selecting wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
return idx;
}
static inline u32 armv7pmu_read_counter(int idx)
{
unsigned long value = 0;
if (idx == ARMV7_CYCLE_COUNTER)
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
if (armv7_pmnc_select_counter(idx) == idx)
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r" (value));
} else
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
return value;
}
static inline void armv7pmu_write_counter(int idx, u32 value)
{
if (idx == ARMV7_CYCLE_COUNTER)
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
if (armv7_pmnc_select_counter(idx) == idx)
asm volatile("mcr p15, 0, %0, c9, c13, 2"
: : "r" (value));
} else
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
}
static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
{
if (armv7_pmnc_select_counter(idx) == idx) {
val &= ARMV7_EVTSEL_MASK;
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
}
}
static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u enabling wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_CNTENS_C;
else
val = ARMV7_CNTENS_P(idx);
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u disabling wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_CNTENC_C;
else
val = ARMV7_CNTENC_P(idx);
asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u enabling wrong PMNC counter"
" interrupt enable %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_INTENS_C;
else
val = ARMV7_INTENS_P(idx);
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u disabling wrong PMNC counter"
" interrupt enable %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_INTENC_C;
else
val = ARMV7_INTENC_P(idx);
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_getreset_flags(void)
{
u32 val;
/* Read */
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
/* Write to clear flags */
val &= ARMV7_FLAG_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
return val;
}
#ifdef DEBUG
static void armv7_pmnc_dump_regs(void)
{
u32 val;
unsigned int cnt;
printk(KERN_INFO "PMNC registers dump:\n");
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
printk(KERN_INFO "PMNC =0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
printk(KERN_INFO "CNTENS=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
printk(KERN_INFO "INTENS=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
printk(KERN_INFO "FLAGS =0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
printk(KERN_INFO "SELECT=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
printk(KERN_INFO "CCNT =0x%08x\n", val);
for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
armv7_pmnc_select_counter(cnt);
asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
printk(KERN_INFO "CNT[%d] count =0x%08x\n",
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
}
}
#endif
void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags;
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
spin_lock_irqsave(&pmu_lock, flags);
/*
* Disable counter
*/
armv7_pmnc_disable_counter(idx);
/*
* Set event (if destined for PMNx counters)
* We don't need to set the event if it's a cycle count
*/
if (idx != ARMV7_CYCLE_COUNTER)
armv7_pmnc_write_evtsel(idx, hwc->config_base);
/*
* Enable interrupt for this counter
*/
armv7_pmnc_enable_intens(idx);
/*
* Enable counter
*/
armv7_pmnc_enable_counter(idx);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags;
/*
* Disable counter and interrupt
*/
spin_lock_irqsave(&pmu_lock, flags);
/*
* Disable counter
*/
armv7_pmnc_disable_counter(idx);
/*
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens(idx);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/*
* Get and reset the IRQ flags
*/
pmnc = armv7_pmnc_getreset_flags();
/*
* Did an overflow occur?
*/
if (!armv7_pmnc_has_overflowed(pmnc))
return IRQ_NONE;
/*
* Handle the counter(s) overflow(s)
*/
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run();
return IRQ_HANDLED;
}
static void armv7pmu_start(void)
{
unsigned long flags;
spin_lock_irqsave(&pmu_lock, flags);
/* Enable all counters */
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void armv7pmu_stop(void)
{
unsigned long flags;
spin_lock_irqsave(&pmu_lock, flags);
/* Disable all counters */
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline int armv7_a8_pmu_event_map(int config)
{
int mapping = armv7_a8_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static inline int armv7_a9_pmu_event_map(int config)
{
int mapping = armv7_a9_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static u64 armv7pmu_raw_event(u64 config)
{
return config & 0xff;
}
static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
int idx;
/* Always place a cycle counter into the cycle counter. */
if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return ARMV7_CYCLE_COUNTER;
} else {
/*
* For anything other than a cycle counter, try and use
* the events counters
*/
for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
/* The counters are all in use. */
return -EAGAIN;
}
}
static struct arm_pmu armv7pmu = {
.handle_irq = armv7pmu_handle_irq,
.enable = armv7pmu_enable_event,
.disable = armv7pmu_disable_event,
.raw_event = armv7pmu_raw_event,
.read_counter = armv7pmu_read_counter,
.write_counter = armv7pmu_write_counter,
.get_event_idx = armv7pmu_get_event_idx,
.start = armv7pmu_start,
.stop = armv7pmu_stop,
.max_period = (1LLU << 32) - 1,
};
static u32 __init armv7_reset_read_pmnc(void)
{
u32 nb_cnt;
/* Initialize & Reset PMNC: C and P bits */
armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
/* Read the nb of CNTx counters supported from PMNC */
nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
/* Add the CPU cycles counter and return */
return nb_cnt + 1;
}
/*
* ARMv5 [xscale] Performance counter handling code.
*
* Based on xscale OProfile code.
*
* There are two variants of the xscale PMU that we support:
* - xscale1pmu: 2 event counters and a cycle counter
* - xscale2pmu: 4 event counters and a cycle counter
* The two variants share event definitions, but have different
* PMU structures.
*/
enum xscale_perf_types {
XSCALE_PERFCTR_ICACHE_MISS = 0x00,
XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
XSCALE_PERFCTR_DATA_STALL = 0x02,
XSCALE_PERFCTR_ITLB_MISS = 0x03,
XSCALE_PERFCTR_DTLB_MISS = 0x04,
XSCALE_PERFCTR_BRANCH = 0x05,
XSCALE_PERFCTR_BRANCH_MISS = 0x06,
XSCALE_PERFCTR_INSTRUCTION = 0x07,
XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
XSCALE_PERFCTR_PC_CHANGED = 0x0D,
XSCALE_PERFCTR_BCU_REQUEST = 0x10,
XSCALE_PERFCTR_BCU_FULL = 0x11,
XSCALE_PERFCTR_BCU_DRAIN = 0x12,
XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
XSCALE_PERFCTR_RMW = 0x16,
/* XSCALE_PERFCTR_CCNT is not hardware defined */
XSCALE_PERFCTR_CCNT = 0xFE,
XSCALE_PERFCTR_UNUSED = 0xFF,
};
enum xscale_counters {
XSCALE_CYCLE_COUNTER = 1,
XSCALE_COUNTER0,
XSCALE_COUNTER1,
XSCALE_COUNTER2,
XSCALE_COUNTER3,
};
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
#define XSCALE_PMU_ENABLE 0x001
#define XSCALE_PMN_RESET 0x002
#define XSCALE_CCNT_RESET 0x004
#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
#define XSCALE_PMU_CNT64 0x008
static inline int
xscalepmu_event_map(int config)
{
int mapping = xscale_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static u64
xscalepmu_raw_event(u64 config)
{
return config & 0xff;
}
#define XSCALE1_OVERFLOWED_MASK 0x700
#define XSCALE1_CCOUNT_OVERFLOW 0x400
#define XSCALE1_COUNT0_OVERFLOW 0x100
#define XSCALE1_COUNT1_OVERFLOW 0x200
#define XSCALE1_CCOUNT_INT_EN 0x040
#define XSCALE1_COUNT0_INT_EN 0x010
#define XSCALE1_COUNT1_INT_EN 0x020
#define XSCALE1_COUNT0_EVT_SHFT 12
#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
#define XSCALE1_COUNT1_EVT_SHFT 20
#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
static inline u32
xscale1pmu_read_pmnc(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
return val;
}
static inline void
xscale1pmu_write_pmnc(u32 val)
{
/* upper 4bits and 7, 11 are write-as-0 */
val &= 0xffff77f;
asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
}
static inline int
xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
enum xscale_counters counter)
{
int ret = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
break;
case XSCALE_COUNTER0:
ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
break;
case XSCALE_COUNTER1:
ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
return ret;
}
static irqreturn_t
xscale1pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/*
* NOTE: there's an A stepping erratum that states if an overflow
* bit already exists and another occurs, the previous
* Overflow bit gets cleared. There's no workaround.
* Fixed in B stepping or later.
*/
pmnc = xscale1pmu_read_pmnc();
/*
* Write the value back to clear the overflow flags. Overflow
* flags remain in pmnc for use below. We also disable the PMU
* while we process the interrupt.
*/
xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
return IRQ_NONE;
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
irq_work_run();
/*
* Re-enable the PMU.
*/
pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(pmnc);
return IRQ_HANDLED;
}
static void
xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long val, mask, evt, flags;
switch (idx) {
case XSCALE_CYCLE_COUNTER:
mask = 0;
evt = XSCALE1_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
mask = XSCALE1_COUNT0_EVT_MASK;
evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
XSCALE1_COUNT0_INT_EN;
break;
case XSCALE_COUNTER1:
mask = XSCALE1_COUNT1_EVT_MASK;
evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
XSCALE1_COUNT1_INT_EN;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long val, mask, evt, flags;
switch (idx) {
case XSCALE_CYCLE_COUNTER:
mask = XSCALE1_CCOUNT_INT_EN;
evt = 0;
break;
case XSCALE_COUNTER0:
mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
if (XSCALE_PERFCTR_CCNT == event->config_base) {
if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return XSCALE_CYCLE_COUNTER;
} else {
if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
return XSCALE_COUNTER1;
}
if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
return XSCALE_COUNTER0;
}
return -EAGAIN;
}
}
static void
xscale1pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val |= XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale1pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline u32
xscale1pmu_read_counter(int counter)
{
u32 val = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
break;
}
return val;
}
static inline void
xscale1pmu_write_counter(int counter, u32 val)
{
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
break;
}
}
static const struct arm_pmu xscale1pmu = {
.id = ARM_PERF_PMU_ID_XSCALE1,
.handle_irq = xscale1pmu_handle_irq,
.enable = xscale1pmu_enable_event,
.disable = xscale1pmu_disable_event,
.event_map = xscalepmu_event_map,
.raw_event = xscalepmu_raw_event,
.read_counter = xscale1pmu_read_counter,
.write_counter = xscale1pmu_write_counter,
.get_event_idx = xscale1pmu_get_event_idx,
.start = xscale1pmu_start,
.stop = xscale1pmu_stop,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
#define XSCALE2_OVERFLOWED_MASK 0x01f
#define XSCALE2_CCOUNT_OVERFLOW 0x001
#define XSCALE2_COUNT0_OVERFLOW 0x002
#define XSCALE2_COUNT1_OVERFLOW 0x004
#define XSCALE2_COUNT2_OVERFLOW 0x008
#define XSCALE2_COUNT3_OVERFLOW 0x010
#define XSCALE2_CCOUNT_INT_EN 0x001
#define XSCALE2_COUNT0_INT_EN 0x002
#define XSCALE2_COUNT1_INT_EN 0x004
#define XSCALE2_COUNT2_INT_EN 0x008
#define XSCALE2_COUNT3_INT_EN 0x010
#define XSCALE2_COUNT0_EVT_SHFT 0
#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
#define XSCALE2_COUNT1_EVT_SHFT 8
#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
#define XSCALE2_COUNT2_EVT_SHFT 16
#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
#define XSCALE2_COUNT3_EVT_SHFT 24
#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
static inline u32
xscale2pmu_read_pmnc(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
/* bits 1-2 and 4-23 are read-unpredictable */
return val & 0xff000009;
}
static inline void
xscale2pmu_write_pmnc(u32 val)
{
/* bits 4-23 are write-as-0, 24-31 are write ignored */
val &= 0xf;
asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
}
static inline u32
xscale2pmu_read_overflow_flags(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
return val;
}
static inline void
xscale2pmu_write_overflow_flags(u32 val)
{
asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
}
static inline u32
xscale2pmu_read_event_select(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
return val;
}
static inline void
xscale2pmu_write_event_select(u32 val)
{
asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
}
static inline u32
xscale2pmu_read_int_enable(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
return val;
}
static void
xscale2pmu_write_int_enable(u32 val)
{
asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
}
static inline int
xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
enum xscale_counters counter)
{
int ret = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
break;
case XSCALE_COUNTER0:
ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
break;
case XSCALE_COUNTER1:
ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
break;
case XSCALE_COUNTER2:
ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
break;
case XSCALE_COUNTER3:
ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
return ret;
}
static irqreturn_t
xscale2pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc, of_flags;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/* Disable the PMU. */
pmnc = xscale2pmu_read_pmnc();
xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
/* Check the overflow flag register. */
of_flags = xscale2pmu_read_overflow_flags();
if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
return IRQ_NONE;
/* Clear the overflow bits. */
xscale2pmu_write_overflow_flags(of_flags);
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
irq_work_run();
/*
* Re-enable the PMU.
*/
pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(pmnc);
return IRQ_HANDLED;
}
static void
xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags, ien, evtsel;
ien = xscale2pmu_read_int_enable();
evtsel = xscale2pmu_read_event_select();
switch (idx) {
case XSCALE_CYCLE_COUNTER:
ien |= XSCALE2_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
ien |= XSCALE2_COUNT0_INT_EN;
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
ien |= XSCALE2_COUNT1_INT_EN;
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
break;
case XSCALE_COUNTER2:
ien |= XSCALE2_COUNT2_INT_EN;
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
break;
case XSCALE_COUNTER3:
ien |= XSCALE2_COUNT3_INT_EN;
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags, ien, evtsel;
ien = xscale2pmu_read_int_enable();
evtsel = xscale2pmu_read_event_select();
switch (idx) {
case XSCALE_CYCLE_COUNTER:
ien &= ~XSCALE2_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
ien &= ~XSCALE2_COUNT0_INT_EN;
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
ien &= ~XSCALE2_COUNT1_INT_EN;
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
break;
case XSCALE_COUNTER2:
ien &= ~XSCALE2_COUNT2_INT_EN;
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
break;
case XSCALE_COUNTER3:
ien &= ~XSCALE2_COUNT3_INT_EN;
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
int idx = xscale1pmu_get_event_idx(cpuc, event);
if (idx >= 0)
goto out;
if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
idx = XSCALE_COUNTER3;
else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
idx = XSCALE_COUNTER2;
out:
return idx;
}
static void
xscale2pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
val |= XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale2pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale2pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline u32
xscale2pmu_read_counter(int counter)
{
u32 val = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER2:
asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER3:
asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
break;
}
return val;
}
static inline void
xscale2pmu_write_counter(int counter, u32 val)
{
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER2:
asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER3:
asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
break;
}
}
static const struct arm_pmu xscale2pmu = {
.id = ARM_PERF_PMU_ID_XSCALE2,
.handle_irq = xscale2pmu_handle_irq,
.enable = xscale2pmu_enable_event,
.disable = xscale2pmu_disable_event,
.event_map = xscalepmu_event_map,
.raw_event = xscalepmu_raw_event,
.read_counter = xscale2pmu_read_counter,
.write_counter = xscale2pmu_write_counter,
.get_event_idx = xscale2pmu_get_event_idx,
.start = xscale2pmu_start,
.stop = xscale2pmu_stop,
.num_events = 5,
.max_period = (1LLU << 32) - 1,
};
/* Include the PMU-specific implementations. */
#include "perf_event_xscale.c"
#include "perf_event_v6.c"
#include "perf_event_v7.c"
static int __init
init_hw_perf_events(void)
......@@ -2977,37 +622,16 @@ init_hw_perf_events(void)
case 0xB360: /* ARM1136 */
case 0xB560: /* ARM1156 */
case 0xB760: /* ARM1176 */
armpmu = &armv6pmu;
memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
sizeof(armv6_perf_cache_map));
armpmu = armv6pmu_init();
break;
case 0xB020: /* ARM11mpcore */
armpmu = &armv6mpcore_pmu;
memcpy(armpmu_perf_cache_map,
armv6mpcore_perf_cache_map,
sizeof(armv6mpcore_perf_cache_map));
armpmu = armv6mpcore_pmu_init();
break;
case 0xC080: /* Cortex-A8 */
armv7pmu.id = ARM_PERF_PMU_ID_CA8;
memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
sizeof(armv7_a8_perf_cache_map));
armv7pmu.event_map = armv7_a8_pmu_event_map;
armpmu = &armv7pmu;
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
armpmu = armv7_a8_pmu_init();
break;
case 0xC090: /* Cortex-A9 */
armv7pmu.id = ARM_PERF_PMU_ID_CA9;
memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
sizeof(armv7_a9_perf_cache_map));
armv7pmu.event_map = armv7_a9_pmu_event_map;
armpmu = &armv7pmu;
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
armpmu = armv7_a9_pmu_init();
break;
}
/* Intel CPUs [xscale]. */
......@@ -3015,21 +639,17 @@ init_hw_perf_events(void)
part_number = (cpuid >> 13) & 0x7;
switch (part_number) {
case 1:
armpmu = &xscale1pmu;
memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
sizeof(xscale_perf_cache_map));
armpmu = xscale1pmu_init();
break;
case 2:
armpmu = &xscale2pmu;
memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
sizeof(xscale_perf_cache_map));
armpmu = xscale2pmu_init();
break;
}
}
if (armpmu) {
pr_info("enabled with %s PMU driver, %d counters available\n",
arm_pmu_names[armpmu->id], armpmu->num_events);
armpmu->name, armpmu->num_events);
} else {
pr_info("no hardware support available\n");
}
......
/*
* ARMv6 Performance counter handling code.
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
*
* ARMv6 has 2 configurable performance counters and a single cycle counter.
* They all share a single reset bit but can be written to zero so we can use
* that for a reset.
*
* The counters can't be individually enabled or disabled so when we remove
* one event and replace it with another we could get spurious counts from the
* wrong event. However, we can take advantage of the fact that the
* performance counters can export events to the event bus, and the event bus
* itself can be monitored. This requires that we *don't* export the events to
* the event bus. The procedure for disabling a configurable counter is:
* - change the counter to count the ETMEXTOUT[0] signal (0x20). This
* effectively stops the counter from counting.
* - disable the counter's interrupt generation (each counter has it's
* own interrupt enable bit).
* Once stopped, the counter value can be written as 0 to reset.
*
* To enable a counter:
* - enable the counter's interrupt generation.
* - set the new event type.
*
* Note: the dedicated cycle counter only counts cycles and can't be
* enabled/disabled independently of the others. When we want to disable the
* cycle counter, we have to just disable the interrupt reporting and start
* ignoring that counter. When re-enabling, we have to reset the value and
* enable the interrupt.
*/
#ifdef CONFIG_CPU_V6
enum armv6_perf_types {
ARMV6_PERFCTR_ICACHE_MISS = 0x0,
ARMV6_PERFCTR_IBUF_STALL = 0x1,
ARMV6_PERFCTR_DDEP_STALL = 0x2,
ARMV6_PERFCTR_ITLB_MISS = 0x3,
ARMV6_PERFCTR_DTLB_MISS = 0x4,
ARMV6_PERFCTR_BR_EXEC = 0x5,
ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
ARMV6_PERFCTR_INSTR_EXEC = 0x7,
ARMV6_PERFCTR_DCACHE_HIT = 0x9,
ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
ARMV6_PERFCTR_DCACHE_MISS = 0xB,
ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
ARMV6_PERFCTR_NOP = 0x20,
};
enum armv6_counters {
ARMV6_CYCLE_COUNTER = 1,
ARMV6_COUNTER0,
ARMV6_COUNTER1,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
enum armv6mpcore_perf_types {
ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
static inline unsigned long
armv6_pmcr_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
return val;
}
static inline void
armv6_pmcr_write(unsigned long val)
{
asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
}
#define ARMV6_PMCR_ENABLE (1 << 0)
#define ARMV6_PMCR_CTR01_RESET (1 << 1)
#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
#define ARMV6_PMCR_OVERFLOWED_MASK \
(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
ARMV6_PMCR_CCOUNT_OVERFLOW)
static inline int
armv6_pmcr_has_overflowed(unsigned long pmcr)
{
return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
}
static inline int
armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
enum armv6_counters counter)
{
int ret = 0;
if (ARMV6_CYCLE_COUNTER == counter)
ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
else if (ARMV6_COUNTER0 == counter)
ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
else if (ARMV6_COUNTER1 == counter)
ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
return ret;
}
static inline u32
armv6pmu_read_counter(int counter)
{
unsigned long value = 0;
if (ARMV6_CYCLE_COUNTER == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
else if (ARMV6_COUNTER0 == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
else if (ARMV6_COUNTER1 == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
return value;
}
static inline void
armv6pmu_write_counter(int counter,
u32 value)
{
if (ARMV6_CYCLE_COUNTER == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
else if (ARMV6_COUNTER0 == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
else if (ARMV6_COUNTER1 == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
void
armv6pmu_enable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, evt, flags;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = 0;
evt = ARMV6_PMCR_CCOUNT_IEN;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_EVT_COUNT0_MASK;
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
ARMV6_PMCR_COUNT0_IEN;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_EVT_COUNT1_MASK;
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
ARMV6_PMCR_COUNT1_IEN;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static irqreturn_t
armv6pmu_handle_irq(int irq_num,
void *dev)
{
unsigned long pmcr = armv6_pmcr_read();
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
if (!armv6_pmcr_has_overflowed(pmcr))
return IRQ_NONE;
regs = get_irq_regs();
/*
* The interrupts are cleared by writing the overflow flags back to
* the control register. All of the other bits don't have any effect
* if they are rewritten, so write the whole value back.
*/
armv6_pmcr_write(pmcr);
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run();
return IRQ_HANDLED;
}
static void
armv6pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val |= ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
armv6pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
/* Always place a cycle counter into the cycle counter. */
if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return ARMV6_CYCLE_COUNTER;
} else {
/*
* For anything other than a cycle counter, try and use
* counter0 and counter1.
*/
if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
return ARMV6_COUNTER1;
if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
return ARMV6_COUNTER0;
/* The counters are all in use. */
return -EAGAIN;
}
}
static void
armv6pmu_disable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, evt, flags;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = ARMV6_PMCR_CCOUNT_IEN;
evt = 0;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Mask out the current event and set the counter to count the number
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, flags, evt = 0;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = ARMV6_PMCR_CCOUNT_IEN;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_COUNT0_IEN;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_COUNT1_IEN;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static const struct arm_pmu armv6pmu = {
.id = ARM_PERF_PMU_ID_V6,
.name = "v6",
.handle_irq = armv6pmu_handle_irq,
.enable = armv6pmu_enable_event,
.disable = armv6pmu_disable_event,
.read_counter = armv6pmu_read_counter,
.write_counter = armv6pmu_write_counter,
.get_event_idx = armv6pmu_get_event_idx,
.start = armv6pmu_start,
.stop = armv6pmu_stop,
.cache_map = &armv6_perf_cache_map,
.event_map = &armv6_perf_map,
.raw_event_mask = 0xFF,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
const struct arm_pmu *__init armv6pmu_init(void)
{
return &armv6pmu;
}
/*
* ARMv6mpcore is almost identical to single core ARMv6 with the exception
* that some of the events have different enumerations and that there is no
* *hack* to stop the programmable counters. To stop the counters we simply
* disable the interrupt reporting and update the event. When unthrottling we
* reset the period and enable the interrupt reporting.
*/
static const struct arm_pmu armv6mpcore_pmu = {
.id = ARM_PERF_PMU_ID_V6MP,
.name = "v6mpcore",
.handle_irq = armv6pmu_handle_irq,
.enable = armv6pmu_enable_event,
.disable = armv6mpcore_pmu_disable_event,
.read_counter = armv6pmu_read_counter,
.write_counter = armv6pmu_write_counter,
.get_event_idx = armv6pmu_get_event_idx,
.start = armv6pmu_start,
.stop = armv6pmu_stop,
.cache_map = &armv6mpcore_perf_cache_map,
.event_map = &armv6mpcore_perf_map,
.raw_event_mask = 0xFF,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
const struct arm_pmu *__init armv6mpcore_pmu_init(void)
{
return &armv6mpcore_pmu;
}
#else
const struct arm_pmu *__init armv6pmu_init(void)
{
return NULL;
}
const struct arm_pmu *__init armv6mpcore_pmu_init(void)
{
return NULL;
}
#endif /* CONFIG_CPU_V6 */
/*
* ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
*
* ARMv7 support: Jean Pihet <jpihet@mvista.com>
* 2010 (c) MontaVista Software, LLC.
*
* Copied from ARMv6 code, with the low level code inspired
* by the ARMv7 Oprofile code.
*
* Cortex-A8 has up to 4 configurable performance counters and
* a single cycle counter.
* Cortex-A9 has up to 31 configurable performance counters and
* a single cycle counter.
*
* All counters can be enabled/disabled and IRQ masked separately. The cycle
* counter and all 4 performance counters together can be reset separately.
*/
#ifdef CONFIG_CPU_V7
/* Common ARMv7 event types */
enum armv7_perf_types {
ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
ARMV7_PERFCTR_IFETCH_MISS = 0x01,
ARMV7_PERFCTR_ITLB_MISS = 0x02,
ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
ARMV7_PERFCTR_DTLB_REFILL = 0x05,
ARMV7_PERFCTR_DREAD = 0x06,
ARMV7_PERFCTR_DWRITE = 0x07,
ARMV7_PERFCTR_EXC_TAKEN = 0x09,
ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
ARMV7_PERFCTR_CID_WRITE = 0x0B,
/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
* It counts:
* - all branch instructions,
* - instructions that explicitly write the PC,
* - exception generating instructions.
*/
ARMV7_PERFCTR_PC_WRITE = 0x0C,
ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
ARMV7_PERFCTR_CPU_CYCLES = 0xFF
};
/* ARMv7 Cortex-A8 specific event types */
enum armv7_a8_perf_types {
ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
ARMV7_PERFCTR_L2_ACCESS = 0x43,
ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
ARMV7_PERFCTR_L2_NEON = 0x4E,
ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
ARMV7_PERFCTR_L1_INST = 0x50,
ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
ARMV7_PERFCTR_OP_EXECUTED = 0x55,
ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
ARMV7_PERFCTR_CYCLES_INST = 0x57,
ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
ARMV7_PERFCTR_PMU_EVENTS = 0x72,
};
/* ARMv7 Cortex-A9 specific event types */
enum armv7_a9_perf_types {
ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
ARMV7_PERFCTR_DATA_EVICTION = 0x65,
ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
ARMV7_PERFCTR_ISB_INST = 0x90,
ARMV7_PERFCTR_DSB_INST = 0x91,
ARMV7_PERFCTR_DMB_INST = 0x92,
ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
};
/*
* Cortex-A8 HW events mapping
*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
/*
* Cortex-A9 HW events mapping
*/
static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] =
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
/*
* Perf Events counters
*/
enum armv7_counters {
ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
ARMV7_COUNTER0 = 2, /* First event counter */
};
/*
* The cycle counter is ARMV7_CYCLE_COUNTER.
* The first event counter is ARMV7_COUNTER0.
* The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
*/
#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
/*
* ARMv7 low level PMNC access
*/
/*
* Per-CPU PMNC: config reg
*/
#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
#define ARMV7_PMNC_N_MASK 0x1f
#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
/*
* Available counters
*/
#define ARMV7_CNT0 0 /* First event counter */
#define ARMV7_CCNT 31 /* Cycle counter */
/* Perf Event to low level counters mapping */
#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
/*
* CNTENS: counters enable reg
*/
#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
/*
* CNTENC: counters disable reg
*/
#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
/*
* INTENS: counters overflow interrupt enable reg
*/
#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
/*
* INTENC: counters overflow interrupt disable reg
*/
#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
/*
* EVTSEL: Event selection reg
*/
#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
/*
* SELECT: Counter selection reg
*/
#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
/*
* FLAG: counters overflow flag status reg
*/
#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
static inline unsigned long armv7_pmnc_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
return val;
}
static inline void armv7_pmnc_write(unsigned long val)
{
val &= ARMV7_PMNC_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
}
static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
{
return pmnc & ARMV7_OVERFLOWED_MASK;
}
static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
enum armv7_counters counter)
{
int ret = 0;
if (counter == ARMV7_CYCLE_COUNTER)
ret = pmnc & ARMV7_FLAG_C;
else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
ret = pmnc & ARMV7_FLAG_P(counter);
else
pr_err("CPU%u checking wrong counter %d overflow status\n",
smp_processor_id(), counter);
return ret;
}
static inline int armv7_pmnc_select_counter(unsigned int idx)
{
u32 val;
if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
pr_err("CPU%u selecting wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
return idx;
}
static inline u32 armv7pmu_read_counter(int idx)
{
unsigned long value = 0;
if (idx == ARMV7_CYCLE_COUNTER)
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
if (armv7_pmnc_select_counter(idx) == idx)
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r" (value));
} else
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
return value;
}
static inline void armv7pmu_write_counter(int idx, u32 value)
{
if (idx == ARMV7_CYCLE_COUNTER)
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
if (armv7_pmnc_select_counter(idx) == idx)
asm volatile("mcr p15, 0, %0, c9, c13, 2"
: : "r" (value));
} else
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
}
static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
{
if (armv7_pmnc_select_counter(idx) == idx) {
val &= ARMV7_EVTSEL_MASK;
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
}
}
static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u enabling wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_CNTENS_C;
else
val = ARMV7_CNTENS_P(idx);
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u disabling wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_CNTENC_C;
else
val = ARMV7_CNTENC_P(idx);
asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u enabling wrong PMNC counter"
" interrupt enable %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_INTENS_C;
else
val = ARMV7_INTENS_P(idx);
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u disabling wrong PMNC counter"
" interrupt enable %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_INTENC_C;
else
val = ARMV7_INTENC_P(idx);
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_getreset_flags(void)
{
u32 val;
/* Read */
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
/* Write to clear flags */
val &= ARMV7_FLAG_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
return val;
}
#ifdef DEBUG
static void armv7_pmnc_dump_regs(void)
{
u32 val;
unsigned int cnt;
printk(KERN_INFO "PMNC registers dump:\n");
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
printk(KERN_INFO "PMNC =0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
printk(KERN_INFO "CNTENS=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
printk(KERN_INFO "INTENS=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
printk(KERN_INFO "FLAGS =0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
printk(KERN_INFO "SELECT=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
printk(KERN_INFO "CCNT =0x%08x\n", val);
for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
armv7_pmnc_select_counter(cnt);
asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
printk(KERN_INFO "CNT[%d] count =0x%08x\n",
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
}
}
#endif
void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags;
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
spin_lock_irqsave(&pmu_lock, flags);
/*
* Disable counter
*/
armv7_pmnc_disable_counter(idx);
/*
* Set event (if destined for PMNx counters)
* We don't need to set the event if it's a cycle count
*/
if (idx != ARMV7_CYCLE_COUNTER)
armv7_pmnc_write_evtsel(idx, hwc->config_base);
/*
* Enable interrupt for this counter
*/
armv7_pmnc_enable_intens(idx);
/*
* Enable counter
*/
armv7_pmnc_enable_counter(idx);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags;
/*
* Disable counter and interrupt
*/
spin_lock_irqsave(&pmu_lock, flags);
/*
* Disable counter
*/
armv7_pmnc_disable_counter(idx);
/*
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens(idx);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/*
* Get and reset the IRQ flags
*/
pmnc = armv7_pmnc_getreset_flags();
/*
* Did an overflow occur?
*/
if (!armv7_pmnc_has_overflowed(pmnc))
return IRQ_NONE;
/*
* Handle the counter(s) overflow(s)
*/
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run();
return IRQ_HANDLED;
}
static void armv7pmu_start(void)
{
unsigned long flags;
spin_lock_irqsave(&pmu_lock, flags);
/* Enable all counters */
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void armv7pmu_stop(void)
{
unsigned long flags;
spin_lock_irqsave(&pmu_lock, flags);
/* Disable all counters */
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
int idx;
/* Always place a cycle counter into the cycle counter. */
if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return ARMV7_CYCLE_COUNTER;
} else {
/*
* For anything other than a cycle counter, try and use
* the events counters
*/
for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
/* The counters are all in use. */
return -EAGAIN;
}
}
static struct arm_pmu armv7pmu = {
.handle_irq = armv7pmu_handle_irq,
.enable = armv7pmu_enable_event,
.disable = armv7pmu_disable_event,
.read_counter = armv7pmu_read_counter,
.write_counter = armv7pmu_write_counter,
.get_event_idx = armv7pmu_get_event_idx,
.start = armv7pmu_start,
.stop = armv7pmu_stop,
.raw_event_mask = 0xFF,
.max_period = (1LLU << 32) - 1,
};
static u32 __init armv7_reset_read_pmnc(void)
{
u32 nb_cnt;
/* Initialize & Reset PMNC: C and P bits */
armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
/* Read the nb of CNTx counters supported from PMNC */
nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
/* Add the CPU cycles counter and return */
return nb_cnt + 1;
}
const struct arm_pmu *__init armv7_a8_pmu_init(void)
{
armv7pmu.id = ARM_PERF_PMU_ID_CA8;
armv7pmu.name = "ARMv7 Cortex-A8";
armv7pmu.cache_map = &armv7_a8_perf_cache_map;
armv7pmu.event_map = &armv7_a8_perf_map;
armv7pmu.num_events = armv7_reset_read_pmnc();
return &armv7pmu;
}
const struct arm_pmu *__init armv7_a9_pmu_init(void)
{
armv7pmu.id = ARM_PERF_PMU_ID_CA9;
armv7pmu.name = "ARMv7 Cortex-A9";
armv7pmu.cache_map = &armv7_a9_perf_cache_map;
armv7pmu.event_map = &armv7_a9_perf_map;
armv7pmu.num_events = armv7_reset_read_pmnc();
return &armv7pmu;
}
#else
const struct arm_pmu *__init armv7_a8_pmu_init(void)
{
return NULL;
}
const struct arm_pmu *__init armv7_a9_pmu_init(void)
{
return NULL;
}
#endif /* CONFIG_CPU_V7 */
/*
* ARMv5 [xscale] Performance counter handling code.
*
* Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
*
* Based on the previous xscale OProfile code.
*
* There are two variants of the xscale PMU that we support:
* - xscale1pmu: 2 event counters and a cycle counter
* - xscale2pmu: 4 event counters and a cycle counter
* The two variants share event definitions, but have different
* PMU structures.
*/
#ifdef CONFIG_CPU_XSCALE
enum xscale_perf_types {
XSCALE_PERFCTR_ICACHE_MISS = 0x00,
XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
XSCALE_PERFCTR_DATA_STALL = 0x02,
XSCALE_PERFCTR_ITLB_MISS = 0x03,
XSCALE_PERFCTR_DTLB_MISS = 0x04,
XSCALE_PERFCTR_BRANCH = 0x05,
XSCALE_PERFCTR_BRANCH_MISS = 0x06,
XSCALE_PERFCTR_INSTRUCTION = 0x07,
XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
XSCALE_PERFCTR_PC_CHANGED = 0x0D,
XSCALE_PERFCTR_BCU_REQUEST = 0x10,
XSCALE_PERFCTR_BCU_FULL = 0x11,
XSCALE_PERFCTR_BCU_DRAIN = 0x12,
XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
XSCALE_PERFCTR_RMW = 0x16,
/* XSCALE_PERFCTR_CCNT is not hardware defined */
XSCALE_PERFCTR_CCNT = 0xFE,
XSCALE_PERFCTR_UNUSED = 0xFF,
};
enum xscale_counters {
XSCALE_CYCLE_COUNTER = 1,
XSCALE_COUNTER0,
XSCALE_COUNTER1,
XSCALE_COUNTER2,
XSCALE_COUNTER3,
};
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
#define XSCALE_PMU_ENABLE 0x001
#define XSCALE_PMN_RESET 0x002
#define XSCALE_CCNT_RESET 0x004
#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
#define XSCALE_PMU_CNT64 0x008
#define XSCALE1_OVERFLOWED_MASK 0x700
#define XSCALE1_CCOUNT_OVERFLOW 0x400
#define XSCALE1_COUNT0_OVERFLOW 0x100
#define XSCALE1_COUNT1_OVERFLOW 0x200
#define XSCALE1_CCOUNT_INT_EN 0x040
#define XSCALE1_COUNT0_INT_EN 0x010
#define XSCALE1_COUNT1_INT_EN 0x020
#define XSCALE1_COUNT0_EVT_SHFT 12
#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
#define XSCALE1_COUNT1_EVT_SHFT 20
#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
static inline u32
xscale1pmu_read_pmnc(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
return val;
}
static inline void
xscale1pmu_write_pmnc(u32 val)
{
/* upper 4bits and 7, 11 are write-as-0 */
val &= 0xffff77f;
asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
}
static inline int
xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
enum xscale_counters counter)
{
int ret = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
break;
case XSCALE_COUNTER0:
ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
break;
case XSCALE_COUNTER1:
ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
return ret;
}
static irqreturn_t
xscale1pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/*
* NOTE: there's an A stepping erratum that states if an overflow
* bit already exists and another occurs, the previous
* Overflow bit gets cleared. There's no workaround.
* Fixed in B stepping or later.
*/
pmnc = xscale1pmu_read_pmnc();
/*
* Write the value back to clear the overflow flags. Overflow
* flags remain in pmnc for use below. We also disable the PMU
* while we process the interrupt.
*/
xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
return IRQ_NONE;
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
irq_work_run();
/*
* Re-enable the PMU.
*/
pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(pmnc);
return IRQ_HANDLED;
}
static void
xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long val, mask, evt, flags;
switch (idx) {
case XSCALE_CYCLE_COUNTER:
mask = 0;
evt = XSCALE1_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
mask = XSCALE1_COUNT0_EVT_MASK;
evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
XSCALE1_COUNT0_INT_EN;
break;
case XSCALE_COUNTER1:
mask = XSCALE1_COUNT1_EVT_MASK;
evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
XSCALE1_COUNT1_INT_EN;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long val, mask, evt, flags;
switch (idx) {
case XSCALE_CYCLE_COUNTER:
mask = XSCALE1_CCOUNT_INT_EN;
evt = 0;
break;
case XSCALE_COUNTER0:
mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
if (XSCALE_PERFCTR_CCNT == event->config_base) {
if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return XSCALE_CYCLE_COUNTER;
} else {
if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
return XSCALE_COUNTER1;
if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
return XSCALE_COUNTER0;
return -EAGAIN;
}
}
static void
xscale1pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val |= XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale1pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline u32
xscale1pmu_read_counter(int counter)
{
u32 val = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
break;
}
return val;
}
static inline void
xscale1pmu_write_counter(int counter, u32 val)
{
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
break;
}
}
static const struct arm_pmu xscale1pmu = {
.id = ARM_PERF_PMU_ID_XSCALE1,
.name = "xscale1",
.handle_irq = xscale1pmu_handle_irq,
.enable = xscale1pmu_enable_event,
.disable = xscale1pmu_disable_event,
.read_counter = xscale1pmu_read_counter,
.write_counter = xscale1pmu_write_counter,
.get_event_idx = xscale1pmu_get_event_idx,
.start = xscale1pmu_start,
.stop = xscale1pmu_stop,
.cache_map = &xscale_perf_cache_map,
.event_map = &xscale_perf_map,
.raw_event_mask = 0xFF,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
const struct arm_pmu *__init xscale1pmu_init(void)
{
return &xscale1pmu;
}
#define XSCALE2_OVERFLOWED_MASK 0x01f
#define XSCALE2_CCOUNT_OVERFLOW 0x001
#define XSCALE2_COUNT0_OVERFLOW 0x002
#define XSCALE2_COUNT1_OVERFLOW 0x004
#define XSCALE2_COUNT2_OVERFLOW 0x008
#define XSCALE2_COUNT3_OVERFLOW 0x010
#define XSCALE2_CCOUNT_INT_EN 0x001
#define XSCALE2_COUNT0_INT_EN 0x002
#define XSCALE2_COUNT1_INT_EN 0x004
#define XSCALE2_COUNT2_INT_EN 0x008
#define XSCALE2_COUNT3_INT_EN 0x010
#define XSCALE2_COUNT0_EVT_SHFT 0
#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
#define XSCALE2_COUNT1_EVT_SHFT 8
#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
#define XSCALE2_COUNT2_EVT_SHFT 16
#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
#define XSCALE2_COUNT3_EVT_SHFT 24
#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
static inline u32
xscale2pmu_read_pmnc(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
/* bits 1-2 and 4-23 are read-unpredictable */
return val & 0xff000009;
}
static inline void
xscale2pmu_write_pmnc(u32 val)
{
/* bits 4-23 are write-as-0, 24-31 are write ignored */
val &= 0xf;
asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
}
static inline u32
xscale2pmu_read_overflow_flags(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
return val;
}
static inline void
xscale2pmu_write_overflow_flags(u32 val)
{
asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
}
static inline u32
xscale2pmu_read_event_select(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
return val;
}
static inline void
xscale2pmu_write_event_select(u32 val)
{
asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
}
static inline u32
xscale2pmu_read_int_enable(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
return val;
}
static void
xscale2pmu_write_int_enable(u32 val)
{
asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
}
static inline int
xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
enum xscale_counters counter)
{
int ret = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
break;
case XSCALE_COUNTER0:
ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
break;
case XSCALE_COUNTER1:
ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
break;
case XSCALE_COUNTER2:
ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
break;
case XSCALE_COUNTER3:
ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
return ret;
}
static irqreturn_t
xscale2pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc, of_flags;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/* Disable the PMU. */
pmnc = xscale2pmu_read_pmnc();
xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
/* Check the overflow flag register. */
of_flags = xscale2pmu_read_overflow_flags();
if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
return IRQ_NONE;
/* Clear the overflow bits. */
xscale2pmu_write_overflow_flags(of_flags);
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
irq_work_run();
/*
* Re-enable the PMU.
*/
pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(pmnc);
return IRQ_HANDLED;
}
static void
xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags, ien, evtsel;
ien = xscale2pmu_read_int_enable();
evtsel = xscale2pmu_read_event_select();
switch (idx) {
case XSCALE_CYCLE_COUNTER:
ien |= XSCALE2_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
ien |= XSCALE2_COUNT0_INT_EN;
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
ien |= XSCALE2_COUNT1_INT_EN;
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
break;
case XSCALE_COUNTER2:
ien |= XSCALE2_COUNT2_INT_EN;
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
break;
case XSCALE_COUNTER3:
ien |= XSCALE2_COUNT3_INT_EN;
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags, ien, evtsel;
ien = xscale2pmu_read_int_enable();
evtsel = xscale2pmu_read_event_select();
switch (idx) {
case XSCALE_CYCLE_COUNTER:
ien &= ~XSCALE2_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
ien &= ~XSCALE2_COUNT0_INT_EN;
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
ien &= ~XSCALE2_COUNT1_INT_EN;
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
break;
case XSCALE_COUNTER2:
ien &= ~XSCALE2_COUNT2_INT_EN;
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
break;
case XSCALE_COUNTER3:
ien &= ~XSCALE2_COUNT3_INT_EN;
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
spin_lock_irqsave(&pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
int idx = xscale1pmu_get_event_idx(cpuc, event);
if (idx >= 0)
goto out;
if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
idx = XSCALE_COUNTER3;
else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
idx = XSCALE_COUNTER2;
out:
return idx;
}
static void
xscale2pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
val |= XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale2pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = xscale2pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline u32
xscale2pmu_read_counter(int counter)
{
u32 val = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER2:
asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER3:
asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
break;
}
return val;
}
static inline void
xscale2pmu_write_counter(int counter, u32 val)
{
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER2:
asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER3:
asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
break;
}
}
static const struct arm_pmu xscale2pmu = {
.id = ARM_PERF_PMU_ID_XSCALE2,
.name = "xscale2",
.handle_irq = xscale2pmu_handle_irq,
.enable = xscale2pmu_enable_event,
.disable = xscale2pmu_disable_event,
.read_counter = xscale2pmu_read_counter,
.write_counter = xscale2pmu_write_counter,
.get_event_idx = xscale2pmu_get_event_idx,
.start = xscale2pmu_start,
.stop = xscale2pmu_stop,
.cache_map = &xscale_perf_cache_map,
.event_map = &xscale_perf_map,
.raw_event_mask = 0xFF,
.num_events = 5,
.max_period = (1LLU << 32) - 1,
};
const struct arm_pmu *__init xscale2pmu_init(void)
{
return &xscale2pmu;
}
#else
const struct arm_pmu *__init xscale1pmu_init(void)
{
return NULL;
}
const struct arm_pmu *__init xscale2pmu_init(void)
{
return NULL;
}
#endif /* CONFIG_CPU_XSCALE */
/*
* sched_clock.c: support for extending counters to full 64-bit ns counter
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/clocksource.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <asm/sched_clock.h>
static void sched_clock_poll(unsigned long wrap_ticks);
static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
static void (*sched_clock_update_fn)(void);
static void sched_clock_poll(unsigned long wrap_ticks)
{
mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
sched_clock_update_fn();
}
void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
unsigned int clock_bits, unsigned long rate)
{
unsigned long r, w;
u64 res, wrap;
char r_unit;
sched_clock_update_fn = update;
/* calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60);
r = rate;
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
} else {
r /= 1000;
r_unit = 'k';
}
/* calculate how many ns until we wrap */
wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
do_div(wrap, NSEC_PER_MSEC);
w = wrap;
/* calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, cd->mult, cd->shift);
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
clock_bits, r, r_unit, res, w);
/*
* Start the timer to keep sched_clock() properly updated and
* sets the initial epoch.
*/
sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
sched_clock_poll(sched_clock_timer.data);
/*
* Ensure that sched_clock() starts off at 0ns
*/
cd->epoch_ns = 0;
}
......@@ -16,6 +16,7 @@
#include <linux/cache.h>
#include <linux/profile.h>
#include <linux/errno.h>
#include <linux/ftrace.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/cpu.h>
......@@ -456,7 +457,7 @@ static void ipi_timer(void)
}
#ifdef CONFIG_LOCAL_TIMERS
asmlinkage void __exception do_local_timer(struct pt_regs *regs)
asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
int cpu = smp_processor_id();
......@@ -543,7 +544,7 @@ static void ipi_cpu_stop(unsigned int cpu)
*
* Bit 0 - Inter-processor function call
*/
asmlinkage void __exception do_IPI(struct pt_regs *regs)
asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs)
{
unsigned int cpu = smp_processor_id();
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
......
......@@ -101,6 +101,7 @@ SECTIONS
__exception_text_start = .;
*(.exception.text)
__exception_text_end = .;
IRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
......
......@@ -101,7 +101,6 @@ static struct clocksource clk32k = {
.rating = 150,
.read = read_clk32k,
.mask = CLOCKSOURCE_MASK(20),
.shift = 10,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void)
clockevents_register_device(&clkevt);
/* register clocksource */
clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift);
clocksource_register(&clk32k);
clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK);
}
struct sys_timer at91rm9200_timer = {
......
......@@ -51,7 +51,6 @@ static struct clocksource pit_clk = {
.name = "pit",
.rating = 175,
.read = read_pit_clk,
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void)
* Register clocksource. The high order bits of PIV are unused,
* so this isn't a 32-bit counter unless we get clockevent irqs.
*/
pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift);
bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */;
pit_clk.mask = CLOCKSOURCE_MASK(bits);
clocksource_register(&pit_clk);
clocksource_register_hz(&pit_clk, pit_rate);
/* Set up irq handler */
setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
......
......@@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = {
.rating = 200,
.read = bcmring_get_cycles_timer1,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = {
.rating = 100,
.read = bcmring_get_cycles_timer3,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void)
writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
TIMER1_VA_BASE + TIMER_CTRL);
clocksource_bcmring_timer1.mult =
clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000,
clocksource_bcmring_timer1.shift);
clocksource_register(&clocksource_bcmring_timer1);
clocksource_register_khz(&clocksource_bcmring_timer1,
TIMER1_FREQUENCY_MHZ * 1000);
/* setup timer3 as free-running clocksource */
writel(0, TIMER3_VA_BASE + TIMER_CTRL);
......@@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void)
writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
TIMER3_VA_BASE + TIMER_CTRL);
clocksource_bcmring_timer3.mult =
clocksource_khz2mult(TIMER3_FREQUENCY_KHZ,
clocksource_bcmring_timer3.shift);
clocksource_register(&clocksource_bcmring_timer3);
clocksource_register_khz(&clocksource_bcmring_timer3,
TIMER3_FREQUENCY_KHZ);
return 0;
}
......
......@@ -276,7 +276,6 @@ static struct clocksource clocksource_davinci = {
.rating = 300,
.read = read_cycles,
.mask = CLOCKSOURCE_MASK(32),
.shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -378,10 +377,8 @@ static void __init davinci_timer_init(void)
/* setup clocksource */
clocksource_davinci.name = id_to_name[clocksource_id];
clocksource_davinci.mult =
clocksource_khz2mult(davinci_clock_tick_rate/1000,
clocksource_davinci.shift);
if (clocksource_register(&clocksource_davinci))
if (clocksource_register_hz(&clocksource_davinci,
davinci_clock_tick_rate))
printk(err, clocksource_davinci.name);
/* setup clockevent */
......
......@@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = {
.rating = 200,
.read = timersp_read,
.mask = CLOCKSOURCE_MASK(16),
.shift = 16,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz)
writel(ctrl, base + TIMER_CTRL);
writel(0xffff, base + TIMER_LOAD);
cs->mult = clocksource_khz2mult(khz, cs->shift);
clocksource_register(cs);
clocksource_register_khz(cs, khz);
}
static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE;
......
......@@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/irq.h>
#include <asm/sched_clock.h>
#include <asm/mach/map.h>
#include <asm/mach/irq.h>
......@@ -398,6 +399,23 @@ void __init ixp4xx_sys_init(void)
ixp4xx_exp_bus_size >> 20);
}
/*
* sched_clock()
*/
static DEFINE_CLOCK_DATA(cd);
unsigned long long notrace sched_clock(void)
{
u32 cyc = *IXP4XX_OSTS;
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
static void notrace ixp4xx_update_sched_clock(void)
{
u32 cyc = *IXP4XX_OSTS;
update_sched_clock(&cd, cyc, (u32)~0);
}
/*
* clocksource
*/
......@@ -411,7 +429,6 @@ static struct clocksource clocksource_ixp4xx = {
.rating = 200,
.read = ixp4xx_get_cycles,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -419,21 +436,9 @@ unsigned long ixp4xx_timer_freq = FREQ;
EXPORT_SYMBOL(ixp4xx_timer_freq);
static void __init ixp4xx_clocksource_init(void)
{
clocksource_ixp4xx.mult =
clocksource_hz2mult(ixp4xx_timer_freq,
clocksource_ixp4xx.shift);
clocksource_register(&clocksource_ixp4xx);
}
/*
* sched_clock()
*/
unsigned long long sched_clock(void)
{
cycle_t cyc = ixp4xx_get_cycles(NULL);
struct clocksource *cs = &clocksource_ixp4xx;
init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq);
}
/*
......
......@@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs)
static struct clocksource lpc32xx_clksrc = {
.name = "lpc32xx_clksrc",
.shift = 24,
.rating = 300,
.read = lpc32xx_clksrc_read,
.mask = CLOCKSOURCE_MASK(32),
......@@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void)
__raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE));
__raw_writel(LCP32XX_TIMER_CNTR_TCR_EN,
LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE));
lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate,
lpc32xx_clksrc.shift);
clocksource_register(&lpc32xx_clksrc);
clocksource_register_hz(&lpc32xx_clksrc, clkrate);
}
struct sys_timer lpc32xx_timer = {
......
......@@ -26,8 +26,8 @@
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/sched.h>
#include <linux/cnt32_to_63.h>
#include <asm/sched_clock.h>
#include <mach/addr-map.h>
#include <mach/regs-timers.h>
#include <mach/regs-apbc.h>
......@@ -42,23 +42,7 @@
#define MAX_DELTA (0xfffffffe)
#define MIN_DELTA (16)
#define TCR2NS_SCALE_FACTOR 10
static unsigned long tcr2ns_scale;
static void __init set_tcr2ns_scale(unsigned long tcr_rate)
{
unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR;
do_div(v, tcr_rate);
tcr2ns_scale = v;
/*
* We want an even value to automatically clear the top bit
* returned by cnt32_to_63() without an additional run time
* instruction. So if the LSB is 1 then round it up.
*/
if (tcr2ns_scale & 1)
tcr2ns_scale++;
}
static DEFINE_CLOCK_DATA(cd);
/*
* FIXME: the timer needs some delay to stablize the counter capture
......@@ -75,10 +59,16 @@ static inline uint32_t timer_read(void)
return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0));
}
unsigned long long sched_clock(void)
unsigned long long notrace sched_clock(void)
{
unsigned long long v = cnt32_to_63(timer_read());
return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR;
u32 cyc = timer_read();
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
static void notrace mmp_update_sched_clock(void)
{
u32 cyc = timer_read();
update_sched_clock(&cd, cyc, (u32)~0);
}
static irqreturn_t timer_interrupt(int irq, void *dev_id)
......@@ -146,7 +136,6 @@ static cycle_t clksrc_read(struct clocksource *cs)
static struct clocksource cksrc = {
.name = "clocksource",
.shift = 20,
.rating = 200,
.read = clksrc_read,
.mask = CLOCKSOURCE_MASK(32),
......@@ -186,17 +175,15 @@ void __init timer_init(int irq)
{
timer_config();
set_tcr2ns_scale(CLOCK_TICK_RATE);
init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE);
ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift);
ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt);
ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt);
ckevt.cpumask = cpumask_of(0);
cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift);
setup_irq(irq, &timer_irq);
clocksource_register(&cksrc);
clocksource_register_hz(&cksrc, CLOCK_TICK_RATE);
clockevents_register_device(&ckevt);
}
......@@ -49,6 +49,8 @@ endchoice
config MSM_SOC_REV_A
bool
config ARCH_MSM_SCORPIONMP
bool
config ARCH_MSM_ARM11
bool
......
......@@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = {
.rating = 200,
.read = msm_gpt_read,
.mask = CLOCKSOURCE_MASK(32),
.shift = 17,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
},
.irq = {
......@@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = {
.rating = 300,
.read = msm_dgt_read,
.mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)),
.shift = 24 - MSM_DGT_SHIFT,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
},
.irq = {
......@@ -205,8 +203,7 @@ static void __init msm_timer_init(void)
ce->min_delta_ns = clockevent_delta2ns(4, ce);
ce->cpumask = cpumask_of(0);
cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
res = clocksource_register(cs);
res = clocksource_register_hz(cs, clock->freq);
if (res)
printk(KERN_ERR "msm_timer_init: clocksource_register "
"failed for %s\n", cs->name);
......
......@@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = {
.rating = 200,
.read = netx_get_cycles,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -151,9 +150,7 @@ static void __init netx_timer_init(void)
writel(NETX_GPIO_COUNTER_CTRL_RUN,
NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
clocksource_netx.mult =
clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift);
clocksource_register(&clocksource_netx);
clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE);
netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
netx_clockevent.shift);
......
......@@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = {
.rating = 300,
.read = ns9360_clocksource_read,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void)
__raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE));
ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(),
ns9360_clocksource.shift);
clocksource_register(&ns9360_clocksource);
clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock());
latch = SH_DIV(ns9360_cpuclock(), HZ, 0);
......
......@@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = {
.rating = 300,
.read = mpu_read,
.mask = CLOCKSOURCE_MASK(32),
.shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate)
static char err[] __initdata = KERN_ERR
"%s: can't register clocksource!\n";
clocksource_mpu.mult
= clocksource_khz2mult(rate/1000, clocksource_mpu.shift);
setup_irq(INT_TIMER2, &omap_mpu_timer2_irq);
omap_mpu_timer_start(1, ~0, 1);
if (clocksource_register(&clocksource_mpu))
if (clocksource_register_hz(&clocksource_mpu, rate))
printk(err, clocksource_mpu.name);
}
......
......@@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = {
.rating = 300,
.read = clocksource_read_cycles,
.mask = CLOCKSOURCE_MASK(32),
.shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void)
omap_dm_timer_set_load_start(gpt, 1, 0);
clocksource_gpt.mult =
clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift);
if (clocksource_register(&clocksource_gpt))
if (clocksource_register_hz(&clocksource_gpt, tick_rate))
printk(err2, clocksource_gpt.name);
}
#endif
......
......@@ -17,11 +17,11 @@
#include <linux/interrupt.h>
#include <linux/clockchips.h>
#include <linux/sched.h>
#include <linux/cnt32_to_63.h>
#include <asm/div64.h>
#include <asm/mach/irq.h>
#include <asm/mach/time.h>
#include <asm/sched_clock.h>
#include <mach/regs-ost.h>
/*
......@@ -32,29 +32,18 @@
* long as there is always less than 582 seconds between successive
* calls to sched_clock() which should always be the case in practice.
*/
static DEFINE_CLOCK_DATA(cd);
#define OSCR2NS_SCALE_FACTOR 10
static unsigned long oscr2ns_scale;
static void __init set_oscr2ns_scale(unsigned long oscr_rate)
unsigned long long notrace sched_clock(void)
{
unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR;
do_div(v, oscr_rate);
oscr2ns_scale = v;
/*
* We want an even value to automatically clear the top bit
* returned by cnt32_to_63() without an additional run time
* instruction. So if the LSB is 1 then round it up.
*/
if (oscr2ns_scale & 1)
oscr2ns_scale++;
u32 cyc = OSCR;
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
unsigned long long sched_clock(void)
static void notrace pxa_update_sched_clock(void)
{
unsigned long long v = cnt32_to_63(OSCR);
return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR;
u32 cyc = OSCR;
update_sched_clock(&cd, cyc, (u32)~0);
}
......@@ -127,7 +116,6 @@ static struct clocksource cksrc_pxa_oscr0 = {
.rating = 200,
.read = pxa_read_oscr,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -145,7 +133,7 @@ static void __init pxa_timer_init(void)
OIER = 0;
OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
set_oscr2ns_scale(clock_tick_rate);
init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate);
ckevt_pxa_osmr0.mult =
div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift);
......@@ -155,12 +143,9 @@ static void __init pxa_timer_init(void)
clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
ckevt_pxa_osmr0.cpumask = cpumask_of(0);
cksrc_pxa_oscr0.mult =
clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
setup_irq(IRQ_OST0, &pxa_ost0_irq);
clocksource_register(&cksrc_pxa_oscr0);
clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate);
clockevents_register_device(&ckevt_pxa_osmr0);
}
......
......@@ -52,6 +52,8 @@
#include <mach/irqs.h>
#include <asm/hardware/timer-sp.h>
#include <plat/sched_clock.h>
#include "core.h"
#ifdef CONFIG_ZONE_DMA
......@@ -654,6 +656,12 @@ void realview_leds_event(led_event_t ledevt)
}
#endif /* CONFIG_LEDS */
/*
* The sched_clock counter
*/
#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + \
REALVIEW_SYS_24MHz_OFFSET)
/*
* Where is the timer (VA)?
*/
......@@ -669,6 +677,8 @@ void __init realview_timer_init(unsigned int timer_irq)
{
u32 val;
versatile_sched_clock_init(REFCOUNTER, 24000000);
/*
* set clock frequency:
* REALVIEW_REFCLK is 32KHz
......
......@@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = {
.rating = 250,
.read = s5pv310_pwm4_read,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS ,
};
......@@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void)
s5pv310_pwm_init(4, ~0);
s5pv310_pwm_start(4, 1);
pwm_clocksource.mult =
clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift);
if (clocksource_register(&pwm_clocksource))
if (clocksource_register_hz(&pwm_clocksource, clock_rate))
panic("%s: can't register clocksource\n", pwm_clocksource.name);
}
......
......@@ -16,9 +16,7 @@
#include <linux/pm.h>
#include <linux/cpufreq.h>
#include <linux/ioport.h>
#include <linux/sched.h> /* just for sched_clock() - funny that */
#include <linux/platform_device.h>
#include <linux/cnt32_to_63.h>
#include <asm/div64.h>
#include <mach/hardware.h>
......@@ -109,27 +107,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu)
return cclk_frequency_100khz[PPCR & 0xf] * 100;
}
/*
* This is the SA11x0 sched_clock implementation. This has
* a resolution of 271ns, and a maximum value of 32025597s (370 days).
*
* The return value is guaranteed to be monotonic in that range as
* long as there is always less than 582 seconds between successive
* calls to this function.
*
* ( * 1E9 / 3686400 => * 78125 / 288)
*/
unsigned long long sched_clock(void)
{
unsigned long long v = cnt32_to_63(OSCR);
/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
v *= 78125<<1;
do_div(v, 288<<1);
return v;
}
/*
* Default power-off for SA1100
*/
......
......@@ -12,12 +12,39 @@
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/sched.h> /* just for sched_clock() - funny that */
#include <linux/timex.h>
#include <linux/clockchips.h>
#include <asm/mach/time.h>
#include <asm/sched_clock.h>
#include <mach/hardware.h>
/*
* This is the SA11x0 sched_clock implementation.
*/
static DEFINE_CLOCK_DATA(cd);
/*
* Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz,
* NSEC_PER_SEC, 60).
* This gives a resolution of about 271ns and a wrap period of about 19min.
*/
#define SC_MULT 2275555556u
#define SC_SHIFT 23
unsigned long long notrace sched_clock(void)
{
u32 cyc = OSCR;
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
}
static void notrace sa1100_update_sched_clock(void)
{
u32 cyc = OSCR;
update_sched_clock(&cd, cyc, (u32)~0);
}
#define MIN_OSCR_DELTA 2
static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
......@@ -81,7 +108,6 @@ static struct clocksource cksrc_sa1100_oscr = {
.rating = 200,
.read = sa1100_read_oscr,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -97,6 +123,9 @@ static void __init sa1100_timer_init(void)
OIER = 0; /* disable any timer interrupts */
OSSR = 0xf; /* clear status on all timers */
init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32,
3686400, SC_MULT, SC_SHIFT);
ckevt_sa1100_osmr0.mult =
div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift);
ckevt_sa1100_osmr0.max_delta_ns =
......@@ -105,12 +134,9 @@ static void __init sa1100_timer_init(void)
clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
cksrc_sa1100_oscr.mult =
clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
setup_irq(IRQ_OST0, &sa1100_timer_irq);
clocksource_register(&cksrc_sa1100_oscr);
clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE);
clockevents_register_device(&ckevt_sa1100_osmr0);
}
......
......@@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = {
.rating = 200,
.read = tcc_get_cycles,
.mask = CLOCKSOURCE_MASK(32),
.shift = 28,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock)
{
unsigned int c = clk_get_rate(clock);
clocksource_tcc.mult = clocksource_hz2mult(c,
clocksource_tcc.shift);
clocksource_register(&clocksource_tcc);
clocksource_register_hz(&clocksource_tcc, c);
clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC,
clockevent_tcc.shift);
......
......@@ -18,6 +18,7 @@
*/
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/time.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
......@@ -25,10 +26,10 @@
#include <linux/clocksource.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/cnt32_to_63.h>
#include <asm/mach/time.h>
#include <asm/localtimer.h>
#include <asm/sched_clock.h>
#include <mach/iomap.h>
#include <mach/irqs.h>
......@@ -91,7 +92,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode,
static cycle_t tegra_clocksource_read(struct clocksource *cs)
{
return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US));
return timer_readl(TIMERUS_CNTR_1US);
}
static struct clock_event_device tegra_clockevent = {
......@@ -106,14 +107,29 @@ static struct clocksource tegra_clocksource = {
.name = "timer_us",
.rating = 300,
.read = tegra_clocksource_read,
.mask = 0x7FFFFFFFFFFFFFFFULL,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
unsigned long long sched_clock(void)
static DEFINE_CLOCK_DATA(cd);
/*
* Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60).
* This gives a resolution of about 1us and a wrap period of about 1h11min.
*/
#define SC_MULT 4194304000u
#define SC_SHIFT 22
unsigned long long notrace sched_clock(void)
{
return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource),
tegra_clocksource.mult, tegra_clocksource.shift);
u32 cyc = timer_readl(TIMERUS_CNTR_1US);
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
}
static void notrace tegra_update_sched_clock(void)
{
u32 cyc = timer_readl(TIMERUS_CNTR_1US);
update_sched_clock(&cd, cyc, (u32)~0);
}
static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
......@@ -158,6 +174,9 @@ static void __init tegra_init_timer(void)
WARN(1, "Unknown clock rate");
}
init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32,
1000000, SC_MULT, SC_SHIFT);
if (clocksource_register_hz(&tegra_clocksource, 1000000)) {
printk(KERN_ERR "Failed to register clocksource\n");
BUG();
......
......@@ -9,6 +9,7 @@
* Author: Linus Walleij <linus.walleij@stericsson.com>
*/
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/clockchips.h>
......@@ -21,6 +22,7 @@
#include <mach/hardware.h>
/* Generic stuff */
#include <asm/sched_clock.h>
#include <asm/mach/map.h>
#include <asm/mach/time.h>
#include <asm/mach/irq.h>
......@@ -352,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = {
* this wraps around for now, since it is just a relative time
* stamp. (Inspired by OMAP implementation.)
*/
static DEFINE_CLOCK_DATA(cd);
unsigned long long notrace sched_clock(void)
{
return clocksource_cyc2ns(clocksource_u300_1mhz.read(
&clocksource_u300_1mhz),
clocksource_u300_1mhz.mult,
clocksource_u300_1mhz.shift);
u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
static void notrace u300_update_sched_clock(void)
{
u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
update_sched_clock(&cd, cyc, (u32)~0);
}
......@@ -375,6 +383,8 @@ static void __init u300_timer_init(void)
clk_enable(clk);
rate = clk_get_rate(clk);
init_sched_clock(&cd, u300_update_sched_clock, 32, rate);
/*
* Disable the "OS" and "DD" timers - these are designed for Symbian!
* Example usage in cnh1601578 cpu subsystem pd_timer_app.c
......@@ -412,9 +422,7 @@ static void __init u300_timer_init(void)
writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE,
U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2);
clocksource_calc_mult_shift(&clocksource_u300_1mhz,
rate, APPTIMER_MIN_RANGE);
if (clocksource_register(&clocksource_u300_1mhz))
if (clocksource_register_hz(&clocksource_u300_1mhz, rate))
printk(KERN_ERR "timer: failed to initialize clock "
"source %s\n", clocksource_u300_1mhz.name);
......
......@@ -51,6 +51,8 @@
#include <mach/platform.h>
#include <asm/hardware/timer-sp.h>
#include <plat/sched_clock.h>
#include "core.h"
/*
......@@ -885,6 +887,12 @@ void __init versatile_init(void)
#endif
}
/*
* The sched_clock counter
*/
#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + \
VERSATILE_SYS_24MHz_OFFSET)
/*
* Where is the timer (VA)?
*/
......@@ -900,6 +908,8 @@ static void __init versatile_timer_init(void)
{
u32 val;
versatile_sched_clock_init(REFCOUNTER, 24000000);
/*
* set clock frequency:
* VERSATILE_REFCLK is 32KHz
......
......@@ -18,11 +18,12 @@
#include <asm/mach/map.h>
#include <asm/mach/time.h>
#include <asm/hardware/arm_timer.h>
#include <asm/hardware/timer-sp.h>
#include <mach/clkdev.h>
#include <mach/motherboard.h>
#include <asm/hardware/timer-sp.h>
#include <plat/sched_clock.h>
#include "core.h"
......@@ -50,6 +51,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num)
static void __init v2m_timer_init(void)
{
versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000);
writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL);
writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL);
......
......@@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = {
.rating = 200,
.read = nuc900_get_cycles,
.mask = CLOCKSOURCE_MASK(TDR_SHIFT),
.shift = 10,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void)
val |= (COUNTEN | PERIOD | PRESCALE);
__raw_writel(val, REG_TCSR1);
clocksource_nuc900.mult =
clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift);
clocksource_register(&clocksource_nuc900);
clocksource_register_hz(&clocksource_nuc900, rate);
}
static void __init nuc900_timer_init(void)
......
......@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/sched.h>
#include <linux/io.h>
......@@ -24,6 +25,7 @@
#include <linux/clockchips.h>
#include <mach/hardware.h>
#include <asm/irq.h>
#include <asm/sched_clock.h>
#include <asm/uaccess.h>
#include <asm/mach/irq.h>
#include <asm/mach/time.h>
......@@ -50,15 +52,21 @@ static struct clocksource iop_clocksource = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static DEFINE_CLOCK_DATA(cd);
/*
* IOP sched_clock() implementation via its clocksource.
*/
unsigned long long sched_clock(void)
unsigned long long notrace sched_clock(void)
{
cycle_t cyc = iop_clocksource_read(NULL);
struct clocksource *cs = &iop_clocksource;
u32 cyc = 0xffffffffu - read_tcr1();
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
static void notrace iop_update_sched_clock(void)
{
u32 cyc = 0xffffffffu - read_tcr1();
update_sched_clock(&cd, cyc, (u32)~0);
}
/*
......@@ -88,6 +96,7 @@ static void iop_set_mode(enum clock_event_mode mode,
case CLOCK_EVT_MODE_PERIODIC:
write_tmr0(tmr & ~IOP_TMR_EN);
write_tcr0(ticks_per_jiffy - 1);
write_trr0(ticks_per_jiffy - 1);
tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN);
break;
case CLOCK_EVT_MODE_ONESHOT:
......@@ -143,6 +152,8 @@ void __init iop_init_time(unsigned long tick_rate)
{
u32 timer_ctl;
init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate);
ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ);
iop_tick_rate = tick_rate;
......@@ -153,6 +164,7 @@ void __init iop_init_time(unsigned long tick_rate)
* Set up interrupting clockevent timer 0.
*/
write_tmr0(timer_ctl & ~IOP_TMR_EN);
write_tisr(1);
setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq);
clockevents_calc_mult_shift(&iop_clockevent,
tick_rate, IOP_MIN_RANGE);
......@@ -162,9 +174,6 @@ void __init iop_init_time(unsigned long tick_rate)
clockevent_delta2ns(0xf, &iop_clockevent);
iop_clockevent.cpumask = cpumask_of(0);
clockevents_register_device(&iop_clockevent);
write_trr0(ticks_per_jiffy - 1);
write_tcr0(ticks_per_jiffy - 1);
write_tmr0(timer_ctl);
/*
* Set up free-running clocksource timer 1.
......@@ -172,7 +181,5 @@ void __init iop_init_time(unsigned long tick_rate)
write_trr1(0xffffffff);
write_tcr1(0xffffffff);
write_tmr1(timer_ctl);
clocksource_calc_mult_shift(&iop_clocksource, tick_rate,
IOP_MIN_RANGE);
clocksource_register(&iop_clocksource);
clocksource_register_hz(&iop_clocksource, tick_rate);
}
......@@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = {
.rating = 200,
.read = epit_read,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk)
{
unsigned int c = clk_get_rate(timer_clk);
clocksource_epit.mult = clocksource_hz2mult(c,
clocksource_epit.shift);
clocksource_register(&clocksource_epit);
clocksource_register_hz(&clocksource_epit, c);
return 0;
}
......
......@@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = {
.rating = 200,
.read = mx1_2_get_cycles,
.mask = CLOCKSOURCE_MASK(32),
.shift = 20,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
if (timer_is_v2())
clocksource_mxc.read = v2_get_cycles;
clocksource_mxc.mult = clocksource_hz2mult(c,
clocksource_mxc.shift);
clocksource_register(&clocksource_mxc);
clocksource_register_hz(&clocksource_mxc, c);
return 0;
}
......
......@@ -14,6 +14,7 @@ if PLAT_NOMADIK
config HAS_MTU
bool
select HAVE_SCHED_CLOCK
help
Support for Multi Timer Unit. MTU provides access
to multiple interrupt generating programmable
......
......@@ -17,9 +17,9 @@
#include <linux/clk.h>
#include <linux/jiffies.h>
#include <linux/err.h>
#include <linux/cnt32_to_63.h>
#include <linux/timer.h>
#include <linux/sched.h>
#include <asm/mach/time.h>
#include <asm/sched_clock.h>
#include <plat/mtu.h>
......@@ -52,81 +52,24 @@ static struct clocksource nmdk_clksrc = {
* Override the global weak sched_clock symbol with this
* local implementation which uses the clocksource to get some
* better resolution when scheduling the kernel.
*
* Because the hardware timer period may be quite short
* (32.3 secs on the 133 MHz MTU timer selection on ux500)
* and because cnt32_to_63() needs to be called at least once per
* half period to work properly, a kernel keepwarm() timer is set up
* to ensure this requirement is always met.
*
* Also the sched_clock timer will wrap around at some point,
* here we set it to run continously for a year.
*/
#define SCHED_CLOCK_MIN_WRAP 3600*24*365
static struct timer_list cnt32_to_63_keepwarm_timer;
static u32 sched_mult;
static u32 sched_shift;
static DEFINE_CLOCK_DATA(cd);
unsigned long long notrace sched_clock(void)
{
u64 cycles;
u32 cyc;
if (unlikely(!mtu_base))
return 0;
cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0)));
/*
* sched_mult is guaranteed to be even so will
* shift out bit 63
*/
return (cycles * sched_mult) >> sched_shift;
cyc = -readl(mtu_base + MTU_VAL(0));
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
/* Just kick sched_clock every so often */
static void cnt32_to_63_keepwarm(unsigned long data)
static void notrace nomadik_update_sched_clock(void)
{
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
(void) sched_clock();
}
/*
* Set up a timer to keep sched_clock():s 32_to_63 algorithm warm
* once in half a 32bit timer wrap interval.
*/
static void __init nmdk_sched_clock_init(unsigned long rate)
{
u32 v;
unsigned long delta;
u64 days;
/* Find the apropriate mult and shift factors */
clocks_calc_mult_shift(&sched_mult, &sched_shift,
rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP);
/* We need to multiply by an even number to get rid of bit 63 */
if (sched_mult & 1)
sched_mult++;
/* Let's see what we get, take max counter and scale it */
days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift;
do_div(days, NSEC_PER_SEC);
do_div(days, (3600*24));
pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n",
(64 - sched_shift), rate, (unsigned long) days);
/*
* Program a timer to kick us at half 32bit wraparound
* Formula: seconds per wrap = (2^32) / f
*/
v = 0xFFFFFFFFUL / rate;
/* We want half of the wrap time to keep cnt32_to_63 warm */
v /= 2;
pr_debug("sched_clock: prescaled timer rate: %lu Hz, "
"initialize keepwarm timer every %d seconds\n", rate, v);
/* Convert seconds to jiffies */
delta = msecs_to_jiffies(v*1000);
setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta);
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta));
u32 cyc = -readl(mtu_base + MTU_VAL(0));
update_sched_clock(&cd, cyc, (u32)~0);
}
/* Clockevent device: use one-shot mode */
......@@ -222,7 +165,6 @@ void __init nmdk_timer_init(void)
} else {
cr |= MTU_CRn_PRESCALE_1;
}
clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE);
/* Timer 0 is the free running clocksource */
writel(cr, mtu_base + MTU_CR(0));
......@@ -233,11 +175,11 @@ void __init nmdk_timer_init(void)
/* Now the clock source is ready */
nmdk_clksrc.read = nmdk_read_timer;
if (clocksource_register(&nmdk_clksrc))
if (clocksource_register_hz(&nmdk_clksrc, rate))
pr_err("timer: failed to initialize clock source %s\n",
nmdk_clksrc.name);
nmdk_sched_clock_init(rate);
init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate);
/* Timer 1 is used for events */
......
......@@ -15,8 +15,11 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/err.h>
#include <linux/io.h>
#include <linux/sched.h>
#include <asm/sched_clock.h>
#include <plat/common.h>
#include <plat/board.h>
......@@ -45,7 +48,7 @@
static u32 offset_32k __read_mostly;
#ifdef CONFIG_ARCH_OMAP16XX
static cycle_t omap16xx_32k_read(struct clocksource *cs)
static cycle_t notrace omap16xx_32k_read(struct clocksource *cs)
{
return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k;
}
......@@ -54,7 +57,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs)
#endif
#ifdef CONFIG_ARCH_OMAP2420
static cycle_t omap2420_32k_read(struct clocksource *cs)
static cycle_t notrace omap2420_32k_read(struct clocksource *cs)
{
return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k;
}
......@@ -63,7 +66,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs)
#endif
#ifdef CONFIG_ARCH_OMAP2430
static cycle_t omap2430_32k_read(struct clocksource *cs)
static cycle_t notrace omap2430_32k_read(struct clocksource *cs)
{
return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k;
}
......@@ -72,7 +75,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs)
#endif
#ifdef CONFIG_ARCH_OMAP3
static cycle_t omap34xx_32k_read(struct clocksource *cs)
static cycle_t notrace omap34xx_32k_read(struct clocksource *cs)
{
return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k;
}
......@@ -81,7 +84,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs)
#endif
#ifdef CONFIG_ARCH_OMAP4
static cycle_t omap44xx_32k_read(struct clocksource *cs)
static cycle_t notrace omap44xx_32k_read(struct clocksource *cs)
{
return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k;
}
......@@ -93,7 +96,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs)
* Kernel assumes that sched_clock can be called early but may not have
* things ready yet.
*/
static cycle_t omap_32k_read_dummy(struct clocksource *cs)
static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs)
{
return 0;
}
......@@ -103,7 +106,6 @@ static struct clocksource clocksource_32k = {
.rating = 250,
.read = omap_32k_read_dummy,
.mask = CLOCKSOURCE_MASK(32),
.shift = 10,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -111,10 +113,25 @@ static struct clocksource clocksource_32k = {
* Returns current time from boot in nsecs. It's OK for this to wrap
* around for now, as it's just a relative time stamp.
*/
unsigned long long sched_clock(void)
static DEFINE_CLOCK_DATA(cd);
/*
* Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60).
* This gives a resolution of about 30us and a wrap period of about 36hrs.
*/
#define SC_MULT 4000000000u
#define SC_SHIFT 17
unsigned long long notrace sched_clock(void)
{
u32 cyc = clocksource_32k.read(&clocksource_32k);
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
}
static void notrace omap_update_sched_clock(void)
{
return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
clocksource_32k.mult, clocksource_32k.shift);
u32 cyc = clocksource_32k.read(&clocksource_32k);
update_sched_clock(&cd, cyc, (u32)~0);
}
/**
......@@ -168,13 +185,13 @@ static int __init omap_init_clocksource_32k(void)
if (!IS_ERR(sync_32k_ick))
clk_enable(sync_32k_ick);
clocksource_32k.mult = clocksource_hz2mult(32768,
clocksource_32k.shift);
offset_32k = clocksource_32k.read(&clocksource_32k);
if (clocksource_register(&clocksource_32k))
if (clocksource_register_hz(&clocksource_32k, 32768))
printk(err, clocksource_32k.name);
init_fixed_sched_clock(&cd, omap_update_sched_clock, 32,
32768, SC_MULT, SC_SHIFT);
}
return 0;
}
......
......@@ -13,11 +13,11 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/cnt32_to_63.h>
#include <linux/timer.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/sched_clock.h>
#include <asm/mach/time.h>
#include <mach/bridge-regs.h>
#include <mach/hardware.h>
......@@ -44,52 +44,26 @@ static u32 ticks_per_jiffy;
/*
* Orion's sched_clock implementation. It has a resolution of
* at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
*
* Because the hardware timer period is quite short (21 secs if
* 200MHz TCLK) and because cnt32_to_63() needs to be called at
* least once per half period to work properly, a kernel timer is
* set up to ensure this requirement is always met.
* at least 7.5ns (133MHz TCLK).
*/
#define TCLK2NS_SCALE_FACTOR 8
static unsigned long tclk2ns_scale;
static DEFINE_CLOCK_DATA(cd);
unsigned long long sched_clock(void)
unsigned long long notrace sched_clock(void)
{
unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
u32 cyc = 0xffffffff - readl(TIMER0_VAL);
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}
static struct timer_list cnt32_to_63_keepwarm_timer;
static void cnt32_to_63_keepwarm(unsigned long data)
static void notrace orion_update_sched_clock(void)
{
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
(void) sched_clock();
u32 cyc = 0xffffffff - readl(TIMER0_VAL);
update_sched_clock(&cd, cyc, (u32)~0);
}
static void __init setup_sched_clock(unsigned long tclk)
{
unsigned long long v;
unsigned long data;
v = NSEC_PER_SEC;
v <<= TCLK2NS_SCALE_FACTOR;
v += tclk/2;
do_div(v, tclk);
/*
* We want an even value to automatically clear the top bit
* returned by cnt32_to_63() without an additional run time
* instruction. So if the LSB is 1 then round it up.
*/
if (v & 1)
v++;
tclk2ns_scale = v;
data = (0xffffffffUL / tclk / 2 - 2) * HZ;
setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
init_sched_clock(&cd, orion_update_sched_clock, 32, tclk);
}
/*
......@@ -102,7 +76,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs)
static struct clocksource orion_clksrc = {
.name = "orion_clocksource",
.shift = 20,
.rating = 300,
.read = orion_clksrc_read,
.mask = CLOCKSOURCE_MASK(32),
......@@ -245,8 +218,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK);
u = readl(TIMER_CTRL);
writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL);
orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift);
clocksource_register(&orion_clksrc);
clocksource_register_hz(&orion_clksrc, tclk);
/*
* Setup clockevent timer (interrupt-driven.)
......
......@@ -81,8 +81,6 @@ static struct clocksource clksrc = {
.rating = 200, /* its a pretty decent clock */
.read = clocksource_read_cycles,
.mask = 0xFFFF, /* 16 bits */
.mult = 0, /* to be computed */
.shift = 0, /* to be computed */
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -105,10 +103,8 @@ static void spear_clocksource_init(void)
val |= CTRL_ENABLE ;
writew(val, gpt_base + CR(CLKSRC));
clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE);
/* register the clocksource */
clocksource_register(&clksrc);
clocksource_register_hz(&clksrc, tick_rate);
}
static struct clock_event_device clkevt = {
......
......@@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = {
.rating = 250,
.read = stmp3xxx_clock_read,
.mask = CLOCKSOURCE_MASK(16),
.shift = 10,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
......@@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = {
*/
static void __init stmp3xxx_init_timer(void)
{
cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
cksrc_stmp3xxx.shift);
ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
ckevt_timrot.shift);
ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot);
......@@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void)
setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
clocksource_register(&cksrc_stmp3xxx);
clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE);
clockevents_register_device(&ckevt_timrot);
}
......
obj-y := clock.o
obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o
obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o
ifneq ($(CONFIG_ARCH_INTEGRATOR),y)
obj-y += sched-clock.o
endif
ifeq ($(CONFIG_LEDS_CLASS),y)
obj-$(CONFIG_ARCH_REALVIEW) += leds.o
obj-$(CONFIG_ARCH_VERSATILE) += leds.o
......
#ifndef ARM_PLAT_SCHED_CLOCK_H
#define ARM_PLAT_SCHED_CLOCK_H
void versatile_sched_clock_init(void __iomem *, unsigned long);
#endif
......@@ -18,36 +18,41 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/cnt32_to_63.h>
#include <linux/io.h>
#include <asm/div64.h>
#include <linux/sched.h>
#include <mach/hardware.h>
#include <mach/platform.h>
#include <asm/sched_clock.h>
#include <plat/sched_clock.h>
#ifdef VERSATILE_SYS_BASE
#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET)
#endif
#ifdef REALVIEW_SYS_BASE
#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
#endif
static DEFINE_CLOCK_DATA(cd);
static void __iomem *ctr;
/*
* This is the Realview and Versatile sched_clock implementation. This
* has a resolution of 41.7ns, and a maximum value of about 35583 days.
*
* The return value is guaranteed to be monotonic in that range as
* long as there is always less than 89 seconds between successive
* calls to this function.
* Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60).
* This gives a resolution of about 41ns and a wrap period of about 178s.
*/
unsigned long long sched_clock(void)
#define SC_MULT 2796202667u
#define SC_SHIFT 26
unsigned long long notrace sched_clock(void)
{
unsigned long long v = cnt32_to_63(readl(REFCOUNTER));
if (ctr) {
u32 cyc = readl(ctr);
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0,
SC_MULT, SC_SHIFT);
} else
return 0;
}
/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
v *= 125<<1;
do_div(v, 3<<1);
static void notrace versatile_update_sched_clock(void)
{
u32 cyc = readl(ctr);
update_sched_clock(&cd, cyc, (u32)~0);
}
return v;
void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate)
{
ctr = reg;
init_fixed_sched_clock(&cd, versatile_update_sched_clock,
32, rate, SC_MULT, SC_SHIFT);
}
......@@ -152,6 +152,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
*/
for (sft = 32; sft > 0; sft--) {
tmp = (u64) to << sft;
tmp += from / 2;
do_div(tmp, from);
if ((tmp >> sftacc) == 0)
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment