Commit 0ad5d703 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tracing/hw-branch-tracing' into tracing/core

Merge reason: this topic is ready for upstream now. It passed
              Oleg's review and Andrew had no further mm/*
              objections/observations either.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 44347d94 1cb81b14
......@@ -506,7 +506,6 @@ config X86_PTRACE_BTS
bool "Branch Trace Store"
default y
depends on X86_DEBUGCTLMSR
depends on BROKEN
---help---
This adds a ptrace interface to the hardware's branch trace store.
......
......@@ -167,6 +167,15 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config X86_DS_SELFTEST
bool "DS selftest"
default y
depends on DEBUG_KERNEL
depends on X86_DS
---help---
Perform Debug Store selftests at boot time.
If in doubt, say "N".
config HAVE_MMIOTRACE_SUPPORT
def_bool y
......
......@@ -15,8 +15,8 @@
* - buffer allocation (memory accounting)
*
*
* Copyright (C) 2007-2008 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
* Copyright (C) 2007-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/
#ifndef _ASM_X86_DS_H
......@@ -83,8 +83,10 @@ enum ds_feature {
* The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism.
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* The function might sleep.
*
* task: the task to request recording for
* cpu: the cpu to request recording for
* base: the base pointer for the (non-pageable) buffer;
* size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
......@@ -93,11 +95,18 @@ enum ds_feature {
* -1 if no interrupt threshold is requested.
* flags: a bit-mask of the above flags
*/
extern struct bts_tracer *ds_request_bts(struct task_struct *task,
extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
......@@ -106,6 +115,8 @@ extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Must be called with irq's enabled.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern void ds_release_bts(struct bts_tracer *tracer);
......@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
extern void ds_suspend_pebs(struct pebs_tracer *tracer);
extern void ds_resume_pebs(struct pebs_tracer *tracer);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Cpu tracers must call this on the traced cpu.
* Task tracers must call ds_release_~_noirq() for themselves.
*
* May be called with irq's disabled.
*
* Returns 0 if successful;
* -EPERM if the cpu tracer does not trace the current cpu.
* -EPERM if the task tracer does not trace itself.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_release_bts_noirq(struct bts_tracer *tracer);
extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
/*
* The raw DS buffer state as it is used for BTS and PEBS recording.
......@@ -170,9 +203,9 @@ struct bts_struct {
} lbr;
/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
struct {
__u64 jiffies;
__u64 clock;
pid_t pid;
} timestamp;
} event;
} variant;
};
......@@ -201,8 +234,12 @@ struct bts_trace {
struct pebs_trace {
struct ds_trace ds;
/* the PEBS reset value */
unsigned long long reset_value;
/* the number of valid counters in the below array */
unsigned int counters;
#define MAX_PEBS_COUNTERS 4
/* the counter reset value */
unsigned long long counter_reset[MAX_PEBS_COUNTERS];
};
......@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
* Returns 0 on success; -Eerrno on error
*
* tracer: the tracer handle returned from ds_request_pebs()
* counter: the index of the counter
* value: the new counter reset value
*/
extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
unsigned int counter, u64 value);
/*
* Initialization
......@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
*/
extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
/*
* Task clone/init and cleanup work
*/
extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
extern void ds_exit_thread(struct task_struct *tsk);
#else /* CONFIG_X86_DS */
struct cpuinfo_x86;
static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
static inline void ds_switch_to(struct task_struct *prev,
struct task_struct *next) {}
static inline void ds_copy_thread(struct task_struct *tsk,
struct task_struct *father) {}
static inline void ds_exit_thread(struct task_struct *tsk) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
......@@ -460,14 +460,8 @@ struct thread_struct {
unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr;
#ifdef CONFIG_X86_DS
/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
/* Debug Store context; see asm/ds.h */
struct ds_context *ds_ctx;
#endif /* CONFIG_X86_DS */
#ifdef CONFIG_X86_PTRACE_BTS
/* the signal to send on a bts buffer overflow */
unsigned int bts_ovfl_signal;
#endif /* CONFIG_X86_PTRACE_BTS */
};
static inline unsigned long native_get_debugreg(int regno)
......@@ -795,6 +789,21 @@ static inline unsigned long get_debugctlmsr(void)
return debugctlmsr;
}
static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
{
u64 debugctlmsr = 0;
u32 val1, val2;
#ifndef CONFIG_X86_DEBUGCTLMSR
if (boot_cpu_data.x86 < 6)
return 0;
#endif
rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
debugctlmsr = val1 | ((u64)val2 << 32);
return debugctlmsr;
}
static inline void update_debugctlmsr(unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
......@@ -804,6 +813,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
static inline void update_debugctlmsr_on_cpu(int cpu,
unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
if (boot_cpu_data.x86 < 6)
return;
#endif
wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
(u32)((u64)debugctlmsr),
(u32)((u64)debugctlmsr >> 32));
}
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
......
......@@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
extern void x86_ptrace_untrace(struct task_struct *);
extern void x86_ptrace_fork(struct task_struct *child,
unsigned long clone_flags);
#ifdef CONFIG_X86_PTRACE_BTS
extern void ptrace_bts_untrace(struct task_struct *tsk);
#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
#endif /* CONFIG_X86_PTRACE_BTS */
#endif /* __KERNEL__ */
......
......@@ -44,6 +44,7 @@ obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
......
......@@ -19,45 +19,61 @@
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/
#include <asm/ds.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/trace_clock.h>
#include <asm/ds.h>
#include "ds_selftest.h"
/*
* The configuration for a particular DS hardware implementation.
* The configuration for a particular DS hardware implementation:
*/
struct ds_configuration {
/* the name of the configuration */
/* The name of the configuration: */
const char *name;
/* the size of one pointer-typed field in the DS structure and
in the BTS and PEBS buffers in bytes;
this covers the first 8 DS fields related to buffer management. */
unsigned char sizeof_field;
/* the size of a BTS/PEBS record in bytes */
/* The size of pointer-typed fields in DS, BTS, and PEBS: */
unsigned char sizeof_ptr_field;
/* The size of a BTS/PEBS record in bytes: */
unsigned char sizeof_rec[2];
/* a series of bit-masks to control various features indexed
* by enum ds_feature */
/* The number of pebs counter reset values in the DS structure. */
unsigned char nr_counter_reset;
/* Control bit-masks indexed by enum ds_feature: */
unsigned long ctl[dsf_ctl_max];
};
static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
static struct ds_configuration ds_cfg __read_mostly;
#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
/* Maximal size of a DS configuration: */
#define MAX_SIZEOF_DS 0x80
#define BTS_CONTROL \
(ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
ds_cfg.ctl[dsf_bts_overflow])
/* Maximal size of a BTS record: */
#define MAX_SIZEOF_BTS (3 * 8)
/* BTS and PEBS buffer alignment: */
#define DS_ALIGNMENT (1 << 3)
/* Number of buffer pointers in DS: */
#define NUM_DS_PTR_FIELDS 8
/* Size of a pebs reset value in DS: */
#define PEBS_RESET_FIELD_SIZE 8
/* Mask of control bits in the DS MSR register: */
#define BTS_CONTROL \
( ds_cfg.ctl[dsf_bts] | \
ds_cfg.ctl[dsf_bts_kernel] | \
ds_cfg.ctl[dsf_bts_user] | \
ds_cfg.ctl[dsf_bts_overflow] )
/*
* A BTS or PEBS tracer.
......@@ -66,28 +82,35 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
* to identify tracers.
*/
struct ds_tracer {
/* the DS context (partially) owned by this tracer */
/* The DS context (partially) owned by this tracer. */
struct ds_context *context;
/* the buffer provided on ds_request() and its size in bytes */
/* The buffer provided on ds_request() and its size in bytes. */
void *buffer;
size_t size;
};
struct bts_tracer {
/* the common DS part */
/* The common DS part: */
struct ds_tracer ds;
/* the trace including the DS configuration */
/* The trace including the DS configuration: */
struct bts_trace trace;
/* buffer overflow notification function */
/* Buffer overflow notification function: */
bts_ovfl_callback_t ovfl;
/* Active flags affecting trace collection. */
unsigned int flags;
};
struct pebs_tracer {
/* the common DS part */
/* The common DS part: */
struct ds_tracer ds;
/* the trace including the DS configuration */
/* The trace including the DS configuration: */
struct pebs_trace trace;
/* buffer overflow notification function */
/* Buffer overflow notification function: */
pebs_ovfl_callback_t ovfl;
};
......@@ -97,6 +120,7 @@ struct pebs_tracer {
*
* The DS configuration consists of the following fields; different
* architetures vary in the size of those fields.
*
* - double-word aligned base linear address of the BTS buffer
* - write pointer into the BTS buffer
* - end linear address of the BTS buffer (one byte beyond the end of
......@@ -139,17 +163,18 @@ enum ds_qualifier {
ds_pebs
};
static inline unsigned long ds_get(const unsigned char *base,
enum ds_qualifier qual, enum ds_field field)
static inline unsigned long
ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
{
base += (ds_cfg.sizeof_field * (field + (4 * qual)));
base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
return *(unsigned long *)base;
}
static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
enum ds_field field, unsigned long value)
static inline void
ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
unsigned long value)
{
base += (ds_cfg.sizeof_field * (field + (4 * qual)));
base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
(*(unsigned long *)base) = value;
}
......@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
*/
static DEFINE_SPINLOCK(ds_lock);
/*
* We either support (system-wide) per-cpu or per-thread allocation.
* We distinguish the two based on the task_struct pointer, where a
......@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock);
*/
static atomic_t tracers = ATOMIC_INIT(0);
static inline void get_tracer(struct task_struct *task)
static inline int get_tracer(struct task_struct *task)
{
if (task)
int error;
spin_lock_irq(&ds_lock);
if (task) {
error = -EPERM;
if (atomic_read(&tracers) < 0)
goto out;
atomic_inc(&tracers);
else
} else {
error = -EPERM;
if (atomic_read(&tracers) > 0)
goto out;
atomic_dec(&tracers);
}
error = 0;
out:
spin_unlock_irq(&ds_lock);
return error;
}
static inline void put_tracer(struct task_struct *task)
......@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task)
atomic_inc(&tracers);
}
static inline int check_tracer(struct task_struct *task)
{
return task ?
(atomic_read(&tracers) >= 0) :
(atomic_read(&tracers) <= 0);
}
/*
* The DS context is either attached to a thread or to a cpu:
* - in the former case, the thread_struct contains a pointer to the
......@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task)
* deallocated when the last user puts the context.
*/
struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
/* The DS configuration; goes into MSR_IA32_DS_AREA: */
unsigned char ds[MAX_SIZEOF_DS];
/* the owner of the BTS and PEBS configuration, respectively */
/* The owner of the BTS and PEBS configuration, respectively: */
struct bts_tracer *bts_master;
struct pebs_tracer *pebs_master;
/* use count */
/* Use count: */
unsigned long count;
/* a pointer to the context location inside the thread_struct
* or the per_cpu context array */
/* Pointer to the context pointer field: */
struct ds_context **this;
/* a pointer to the task owning this context, or NULL, if the
* context is owned by a cpu */
/* The traced task; NULL for cpu tracing: */
struct task_struct *task;
};
static DEFINE_PER_CPU(struct ds_context *, system_context_array);
/* The traced cpu; only valid if task is NULL: */
int cpu;
};
#define system_context per_cpu(system_context_array, smp_processor_id())
static DEFINE_PER_CPU(struct ds_context *, cpu_context);
static inline struct ds_context *ds_get_context(struct task_struct *task)
static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &system_context);
(task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
struct ds_context *context = NULL;
struct ds_context *new_context = NULL;
unsigned long irq;
/* Chances are small that we already have a context. */
new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
if (!new_context)
return NULL;
spin_lock_irqsave(&ds_lock, irq);
spin_lock_irq(&ds_lock);
context = *p_context;
if (!context) {
if (likely(!context)) {
context = new_context;
context->this = p_context;
context->task = task;
context->cpu = cpu;
context->count = 0;
if (task)
set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
if (!task || (task == current))
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
*p_context = context;
}
context->count++;
spin_unlock_irqrestore(&ds_lock, irq);
spin_unlock_irq(&ds_lock);
if (context != new_context)
kfree(new_context);
......@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
return context;
}
static inline void ds_put_context(struct ds_context *context)
static void ds_put_context(struct ds_context *context)
{
struct task_struct *task;
unsigned long irq;
if (!context)
......@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context)
*(context->this) = NULL;
if (context->task)
clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
task = context->task;
if (task)
clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
if (!context->task || (context->task == current))
wrmsrl(MSR_IA32_DS_AREA, 0);
/*
* We leave the (now dangling) pointer to the DS configuration in
* the DS_AREA msr. This is as good or as bad as replacing it with
* NULL - the hardware would crash if we enabled tracing.
*
* This saves us some problems with having to write an msr on a
* different cpu while preventing others from doing the same for the
* next context for that same cpu.
*/
spin_unlock_irqrestore(&ds_lock, irq);
/* The context might still be in use for context switching. */
if (task && (task != current))
wait_task_context_switch(task);
kfree(context);
}
static void ds_install_ds_area(struct ds_context *context)
{
unsigned long ds;
ds = (unsigned long)context->ds;
/*
* There is a race between the bts master and the pebs master.
*
* The thread/cpu access is synchronized via get/put_cpu() for
* task tracing and via wrmsr_on_cpu for cpu tracing.
*
* If bts and pebs are collected for the same task or same cpu,
* the same confiuration is written twice.
*/
if (context->task) {
get_cpu();
if (context->task == current)
wrmsrl(MSR_IA32_DS_AREA, ds);
set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
put_cpu();
} else
wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
(u32)((u64)ds), (u32)((u64)ds >> 32));
}
/*
* Call the tracer's callback on a buffer overflow.
......@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
unsigned long write_size, adj_write_size;
/*
* write as much as possible without producing an
* Write as much as possible without producing an
* overflow interrupt.
*
* interrupt_threshold must either be
* Interrupt_threshold must either be
* - bigger than absolute_maximum or
* - point to a record between buffer_base and absolute_maximum
*
* index points to a valid record.
* Index points to a valid record.
*/
base = ds_get(context->ds, qual, ds_buffer_base);
index = ds_get(context->ds, qual, ds_index);
......@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
write_end = min(end, int_th);
/* if we are already beyond the interrupt threshold,
* we fill the entire buffer */
/*
* If we are already beyond the interrupt threshold,
* we fill the entire buffer.
*/
if (write_end <= index)
write_end = end;
......@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
adj_write_size *= ds_cfg.sizeof_rec[qual];
/* zero out trailing bytes */
/* Zero out trailing bytes. */
memset((char *)index + write_size, 0,
adj_write_size - write_size);
index += adj_write_size;
......@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
* Later architectures use 64bit pointers throughout, whereas earlier
* architectures use 32bit pointers in 32bit mode.
*
* We compute the base address for the first 8 fields based on:
* We compute the base address for the fields based on:
* - the field size stored in the DS configuration
* - the relative field position
*
......@@ -432,7 +502,7 @@ enum bts_field {
bts_flags,
bts_qual = bts_from,
bts_jiffies = bts_to,
bts_clock = bts_to,
bts_pid = bts_flags,
bts_qual_mask = (bts_qual_max - 1),
......@@ -441,13 +511,13 @@ enum bts_field {
static inline unsigned long bts_get(const char *base, enum bts_field field)
{
base += (ds_cfg.sizeof_field * field);
base += (ds_cfg.sizeof_ptr_field * field);
return *(unsigned long *)base;
}
static inline void bts_set(char *base, enum bts_field field, unsigned long val)
{
base += (ds_cfg.sizeof_field * field);;
base += (ds_cfg.sizeof_ptr_field * field);;
(*(unsigned long *)base) = val;
}
......@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
*
* return: bytes read/written on success; -Eerrno, otherwise
*/
static int bts_read(struct bts_tracer *tracer, const void *at,
struct bts_struct *out)
static int
bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
{
if (!tracer)
return -EINVAL;
......@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at,
memset(out, 0, sizeof(*out));
if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
out->variant.timestamp.pid = bts_get(at, bts_pid);
out->variant.event.clock = bts_get(at, bts_clock);
out->variant.event.pid = bts_get(at, bts_pid);
} else {
out->qualifier = bts_branch;
out->variant.lbr.from = bts_get(at, bts_from);
......@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
case bts_task_arrives:
case bts_task_departs:
bts_set(raw, bts_qual, (bts_escape | in->qualifier));
bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
bts_set(raw, bts_pid, in->variant.timestamp.pid);
bts_set(raw, bts_clock, in->variant.event.clock);
bts_set(raw, bts_pid, in->variant.event.pid);
break;
default:
return -EINVAL;
......@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
unsigned int flags) {
unsigned long buffer, adj;
/* adjust the buffer address and size to meet alignment
/*
* Adjust the buffer address and size to meet alignment
* constraints:
* - buffer is double-word aligned
* - size is multiple of record size
......@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
trace->begin = (void *)buffer;
trace->top = trace->begin;
trace->end = (void *)(buffer + size);
/* The value for 'no threshold' is -1, which will set the
/*
* The value for 'no threshold' is -1, which will set the
* threshold outside of the buffer, just like we want it.
*/
ith *= ds_cfg.sizeof_rec[qual];
trace->ith = (void *)(buffer + size - ith);
trace->flags = flags;
......@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
enum ds_qualifier qual, struct task_struct *task,
void *base, size_t size, size_t th, unsigned int flags)
int cpu, void *base, size_t size, size_t th)
{
struct ds_context *context;
int error;
size_t req_size;
error = -EOPNOTSUPP;
if (!ds_cfg.sizeof_rec[qual])
goto out;
error = -EINVAL;
if (!base)
goto out;
/* we require some space to do alignment adjustments below */
req_size = ds_cfg.sizeof_rec[qual];
/* We might need space for alignment adjustments. */
if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
req_size += DS_ALIGNMENT;
error = -EINVAL;
if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
if (size < req_size)
goto out;
if (th != (size_t)-1) {
......@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
tracer->size = size;
error = -ENOMEM;
context = ds_get_context(task);
context = ds_get_context(task, cpu);
if (!context)
goto out;
tracer->context = context;
ds_init_ds_trace(trace, qual, base, size, th, flags);
/*
* Defer any tracer-specific initialization work for the context until
* context ownership has been clarified.
*/
error = 0;
out:
return error;
}
struct bts_tracer *ds_request_bts(struct task_struct *task,
static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
void *base, size_t size,
bts_ovfl_callback_t ovfl, size_t th,
unsigned int flags)
{
struct bts_tracer *tracer;
unsigned long irq;
int error;
/* Buffer overflow notification is not yet implemented. */
error = -EOPNOTSUPP;
if (!ds_cfg.ctl[dsf_bts])
if (ovfl)
goto out;
/* buffer overflow notification is not yet implemented */
error = -EOPNOTSUPP;
if (ovfl)
error = get_tracer(task);
if (error < 0)
goto out;
error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer)
goto out;
goto out_put_tracer;
tracer->ovfl = ovfl;
/* Do some more error checking and acquire a tracing context. */
error = ds_request(&tracer->ds, &tracer->trace.ds,
ds_bts, task, base, size, th, flags);
ds_bts, task, cpu, base, size, th);
if (error < 0)
goto out_tracer;
spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
get_tracer(task);
/* Claim the bts part of the tracing context we acquired above. */
spin_lock_irq(&ds_lock);
error = -EPERM;
if (tracer->ds.context->bts_master)
goto out_put_tracer;
goto out_unlock;
tracer->ds.context->bts_master = tracer;
spin_unlock_irqrestore(&ds_lock, irq);
spin_unlock_irq(&ds_lock);
/*
* Now that we own the bts part of the context, let's complete the
* initialization for that part.
*/
ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
ds_install_ds_area(tracer->ds.context);
tracer->trace.read = bts_read;
tracer->trace.write = bts_write;
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
/* Start tracing. */
ds_resume_bts(tracer);
return tracer;
out_put_tracer:
put_tracer(task);
out_unlock:
spin_unlock_irqrestore(&ds_lock, irq);
spin_unlock_irq(&ds_lock);
ds_put_context(tracer->ds.context);
out_tracer:
kfree(tracer);
out_put_tracer:
put_tracer(task);
out:
return ERR_PTR(error);
}
struct pebs_tracer *ds_request_pebs(struct task_struct *task,
struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_bts(task, 0, base, size, ovfl, th, flags);
}
struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
}
static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
void *base, size_t size,
pebs_ovfl_callback_t ovfl, size_t th,
unsigned int flags)
{
struct pebs_tracer *tracer;
unsigned long irq;
int error;
/* buffer overflow notification is not yet implemented */
/* Buffer overflow notification is not yet implemented. */
error = -EOPNOTSUPP;
if (ovfl)
goto out;
error = get_tracer(task);
if (error < 0)
goto out;
error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer)
goto out;
goto out_put_tracer;
tracer->ovfl = ovfl;
/* Do some more error checking and acquire a tracing context. */
error = ds_request(&tracer->ds, &tracer->trace.ds,
ds_pebs, task, base, size, th, flags);
ds_pebs, task, cpu, base, size, th);
if (error < 0)
goto out_tracer;
spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
get_tracer(task);
/* Claim the pebs part of the tracing context we acquired above. */
spin_lock_irq(&ds_lock);
error = -EPERM;
if (tracer->ds.context->pebs_master)
goto out_put_tracer;
goto out_unlock;
tracer->ds.context->pebs_master = tracer;
spin_unlock_irqrestore(&ds_lock, irq);
spin_unlock_irq(&ds_lock);
/*
* Now that we own the pebs part of the context, let's complete the
* initialization for that part.
*/
ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
ds_install_ds_area(tracer->ds.context);
/* Start tracing. */
ds_resume_pebs(tracer);
return tracer;
out_put_tracer:
put_tracer(task);
out_unlock:
spin_unlock_irqrestore(&ds_lock, irq);
spin_unlock_irq(&ds_lock);
ds_put_context(tracer->ds.context);
out_tracer:
kfree(tracer);
out_put_tracer:
put_tracer(task);
out:
return ERR_PTR(error);
}
void ds_release_bts(struct bts_tracer *tracer)
struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
if (!tracer)
return;
return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
}
ds_suspend_bts(tracer);
struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
}
static void ds_free_bts(struct bts_tracer *tracer)
{
struct task_struct *task;
task = tracer->ds.context->task;
WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
tracer->ds.context->bts_master = NULL;
put_tracer(tracer->ds.context->task);
/* Make sure tracing stopped and the tracer is not in use. */
if (task && (task != current))
wait_task_context_switch(task);
ds_put_context(tracer->ds.context);
put_tracer(task);
kfree(tracer);
}
void ds_release_bts(struct bts_tracer *tracer)
{
might_sleep();
if (!tracer)
return;
ds_suspend_bts(tracer);
ds_free_bts(tracer);
}
int ds_release_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long irq;
int error;
if (!tracer)
return 0;
task = tracer->ds.context->task;
local_irq_save(irq);
error = -EPERM;
if (!task &&
(tracer->ds.context->cpu != smp_processor_id()))
goto out;
error = -EPERM;
if (task && (task != current))
goto out;
ds_suspend_bts_noirq(tracer);
ds_free_bts(tracer);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static void update_task_debugctlmsr(struct task_struct *task,
unsigned long debugctlmsr)
{
task->thread.debugctlmsr = debugctlmsr;
get_cpu();
if (task == current)
update_debugctlmsr(debugctlmsr);
put_cpu();
}
void ds_suspend_bts(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long debugctlmsr;
int cpu;
if (!tracer)
return;
tracer->flags = 0;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
if (!task || (task == current))
update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
WARN_ON(!task && irqs_disabled());
if (task) {
task->thread.debugctlmsr &= ~BTS_CONTROL;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr_on_cpu(cpu));
debugctlmsr &= ~BTS_CONTROL;
if (!task->thread.debugctlmsr)
clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
}
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr_on_cpu(cpu, debugctlmsr);
}
void ds_resume_bts(struct bts_tracer *tracer)
int ds_suspend_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long control;
unsigned long debugctlmsr, irq;
int cpu, error = 0;
if (!tracer)
return;
return 0;
tracer->flags = 0;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
local_irq_save(irq);
error = -EPERM;
if (!task && (cpu != smp_processor_id()))
goto out;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr());
debugctlmsr &= ~BTS_CONTROL;
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr(debugctlmsr);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static unsigned long ds_bts_control(struct bts_tracer *tracer)
{
unsigned long control;
control = ds_cfg.ctl[dsf_bts];
if (!(tracer->trace.ds.flags & BTS_KERNEL))
......@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer)
if (!(tracer->trace.ds.flags & BTS_USER))
control |= ds_cfg.ctl[dsf_bts_user];
if (task) {
task->thread.debugctlmsr |= control;
set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
}
if (!task || (task == current))
update_debugctlmsr(get_debugctlmsr() | control);
return control;
}
void ds_release_pebs(struct pebs_tracer *tracer)
void ds_resume_bts(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long debugctlmsr;
int cpu;
if (!tracer)
return;
ds_suspend_pebs(tracer);
tracer->flags = tracer->trace.ds.flags;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
WARN_ON(!task && irqs_disabled());
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr_on_cpu(cpu));
debugctlmsr |= ds_bts_control(tracer);
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr_on_cpu(cpu, debugctlmsr);
}
int ds_resume_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long debugctlmsr, irq;
int cpu, error = 0;
if (!tracer)
return 0;
tracer->flags = tracer->trace.ds.flags;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
local_irq_save(irq);
error = -EPERM;
if (!task && (cpu != smp_processor_id()))
goto out;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr());
debugctlmsr |= ds_bts_control(tracer);
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr(debugctlmsr);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static void ds_free_pebs(struct pebs_tracer *tracer)
{
struct task_struct *task;
task = tracer->ds.context->task;
WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
tracer->ds.context->pebs_master = NULL;
put_tracer(tracer->ds.context->task);
ds_put_context(tracer->ds.context);
put_tracer(task);
kfree(tracer);
}
void ds_release_pebs(struct pebs_tracer *tracer)
{
might_sleep();
if (!tracer)
return;
ds_suspend_pebs(tracer);
ds_free_pebs(tracer);
}
int ds_release_pebs_noirq(struct pebs_tracer *tracer)
{
struct task_struct *task;
unsigned long irq;
int error;
if (!tracer)
return 0;
task = tracer->ds.context->task;
local_irq_save(irq);
error = -EPERM;
if (!task &&
(tracer->ds.context->cpu != smp_processor_id()))
goto out;
error = -EPERM;
if (task && (task != current))
goto out;
ds_suspend_pebs_noirq(tracer);
ds_free_pebs(tracer);
error = 0;
out:
local_irq_restore(irq);
return error;
}
void ds_suspend_pebs(struct pebs_tracer *tracer)
{
}
int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
{
return 0;
}
void ds_resume_pebs(struct pebs_tracer *tracer)
{
}
int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
{
return 0;
}
const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
{
if (!tracer)
......@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
return NULL;
ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
tracer->trace.reset_value =
*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
tracer->trace.counters = ds_cfg.nr_counter_reset;
memcpy(tracer->trace.counter_reset,
tracer->ds.context->ds +
(NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
return &tracer->trace;
}
......@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
tracer->trace.ds.top = tracer->trace.ds.begin;
ds_set(tracer->ds.context->ds, ds_bts, ds_index,
ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
(unsigned long)tracer->trace.ds.top);
return 0;
}
int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
int ds_set_pebs_reset(struct pebs_tracer *tracer,
unsigned int counter, u64 value)
{
if (!tracer)
return -EINVAL;
*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
if (ds_cfg.nr_counter_reset < counter)
return -EINVAL;
*(u64 *)(tracer->ds.context->ds +
(NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
(counter * PEBS_RESET_FIELD_SIZE)) = value;
return 0;
}
......@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = {
.ctl[dsf_bts] = (1 << 2) | (1 << 3),
.ctl[dsf_bts_kernel] = (1 << 5),
.ctl[dsf_bts_user] = (1 << 6),
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10,
#else
.sizeof_rec[ds_pebs] = sizeof(long) * 18,
#endif
.nr_counter_reset = 1,
};
static const struct ds_configuration ds_cfg_pentium_m = {
.name = "Pentium M",
.ctl[dsf_bts] = (1 << 6) | (1 << 7),
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10,
#else
.sizeof_rec[ds_pebs] = sizeof(long) * 18,
#endif
.nr_counter_reset = 1,
};
static const struct ds_configuration ds_cfg_core2_atom = {
.name = "Core 2/Atom",
.ctl[dsf_bts] = (1 << 6) | (1 << 7),
.ctl[dsf_bts_kernel] = (1 << 9),
.ctl[dsf_bts_user] = (1 << 10),
.sizeof_field = 8,
.sizeof_rec[ds_bts] = 8 * 3,
.sizeof_rec[ds_pebs] = 8 * 18,
.nr_counter_reset = 1,
};
static const struct ds_configuration ds_cfg_core_i7 = {
.name = "Core i7",
.ctl[dsf_bts] = (1 << 6) | (1 << 7),
.ctl[dsf_bts_kernel] = (1 << 9),
.ctl[dsf_bts_user] = (1 << 10),
.nr_counter_reset = 4,
};
static void
ds_configure(const struct ds_configuration *cfg)
ds_configure(const struct ds_configuration *cfg,
struct cpuinfo_x86 *cpu)
{
unsigned long nr_pebs_fields = 0;
printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
#ifdef __i386__
nr_pebs_fields = 10;
#else
nr_pebs_fields = 18;
#endif
/*
* Starting with version 2, architectural performance
* monitoring supports a format specifier.
*/
if ((cpuid_eax(0xa) & 0xff) > 1) {
unsigned long perf_capabilities, format;
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
format = (perf_capabilities >> 8) & 0xf;
switch (format) {
case 0:
nr_pebs_fields = 18;
break;
case 1:
nr_pebs_fields = 22;
break;
default:
printk(KERN_INFO
"[ds] unknown PEBS format: %lu\n", format);
nr_pebs_fields = 0;
break;
}
}
memset(&ds_cfg, 0, sizeof(ds_cfg));
ds_cfg = *cfg;
printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
ds_cfg.sizeof_ptr_field =
(cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
if (!cpu_has_bts) {
ds_cfg.ctl[dsf_bts] = 0;
if (!cpu_has(cpu, X86_FEATURE_BTS)) {
ds_cfg.sizeof_rec[ds_bts] = 0;
printk(KERN_INFO "[ds] bts not available\n");
}
if (!cpu_has_pebs)
if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
ds_cfg.sizeof_rec[ds_pebs] = 0;
printk(KERN_INFO "[ds] pebs not available\n");
}
WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
printk(KERN_INFO "[ds] sizes: address: %u bit, ",
8 * ds_cfg.sizeof_ptr_field);
printk("bts/pebs record: %u/%u bytes\n",
ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
}
void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
{
/* Only configure the first cpu. Others are identical. */
if (ds_cfg.name)
return;
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
case 0x9:
case 0xd: /* Pentium M */
ds_configure(&ds_cfg_pentium_m);
ds_configure(&ds_cfg_pentium_m, c);
break;
case 0xf:
case 0x17: /* Core2 */
case 0x1c: /* Atom */
ds_configure(&ds_cfg_core2_atom);
ds_configure(&ds_cfg_core2_atom, c);
break;
case 0x1a: /* Core i7 */
ds_configure(&ds_cfg_core_i7, c);
break;
case 0x1a: /* i7 */
default:
/* sorry, don't know about them */
/* Sorry, don't know about them. */
break;
}
break;
......@@ -969,19 +1349,40 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
case 0x0:
case 0x1:
case 0x2: /* Netburst */
ds_configure(&ds_cfg_netburst);
ds_configure(&ds_cfg_netburst, c);
break;
default:
/* sorry, don't know about them */
/* Sorry, don't know about them. */
break;
}
break;
default:
/* sorry, don't know about them */
/* Sorry, don't know about them. */
break;
}
}
static inline void ds_take_timestamp(struct ds_context *context,
enum bts_qualifier qualifier,
struct task_struct *task)
{
struct bts_tracer *tracer = context->bts_master;
struct bts_struct ts;
/* Prevent compilers from reading the tracer pointer twice. */
barrier();
if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
return;
memset(&ts, 0, sizeof(ts));
ts.qualifier = qualifier;
ts.variant.event.clock = trace_clock_global();
ts.variant.event.pid = task->pid;
bts_write(tracer, &ts);
}
/*
* Change the DS configuration from tracing prev to tracing next.
*/
......@@ -989,44 +1390,48 @@ void ds_switch_to(struct task_struct *prev, struct task_struct *next)
{
struct ds_context *prev_ctx = prev->thread.ds_ctx;
struct ds_context *next_ctx = next->thread.ds_ctx;
unsigned long debugctlmsr = next->thread.debugctlmsr;
/* Make sure all data is read before we start. */
barrier();
if (prev_ctx) {
update_debugctlmsr(0);
if (prev_ctx->bts_master &&
(prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
struct bts_struct ts = {
.qualifier = bts_task_departs,
.variant.timestamp.jiffies = jiffies_64,
.variant.timestamp.pid = prev->pid
};
bts_write(prev_ctx->bts_master, &ts);
}
ds_take_timestamp(prev_ctx, bts_task_departs, prev);
}
if (next_ctx) {
if (next_ctx->bts_master &&
(next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
struct bts_struct ts = {
.qualifier = bts_task_arrives,
.variant.timestamp.jiffies = jiffies_64,
.variant.timestamp.pid = next->pid
};
bts_write(next_ctx->bts_master, &ts);
}
ds_take_timestamp(next_ctx, bts_task_arrives, next);
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
}
update_debugctlmsr(next->thread.debugctlmsr);
update_debugctlmsr(debugctlmsr);
}
void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
static __init int ds_selftest(void)
{
clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
tsk->thread.ds_ctx = NULL;
}
if (ds_cfg.sizeof_rec[ds_bts]) {
int error;
void ds_exit_thread(struct task_struct *tsk)
{
error = ds_selftest_bts();
if (error) {
WARN(1, "[ds] selftest failed. disabling bts.\n");
ds_cfg.sizeof_rec[ds_bts] = 0;
}
}
if (ds_cfg.sizeof_rec[ds_pebs]) {
int error;
error = ds_selftest_pebs();
if (error) {
WARN(1, "[ds] selftest failed. disabling pebs.\n");
ds_cfg.sizeof_rec[ds_pebs] = 0;
}
}
return 0;
}
device_initcall(ds_selftest);
/*
* Debug Store support - selftest
*
*
* Copyright (C) 2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2009
*/
#include "ds_selftest.h"
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/ds.h>
#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */
#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */
struct ds_selftest_bts_conf {
struct bts_tracer *tracer;
int error;
int (*suspend)(struct bts_tracer *);
int (*resume)(struct bts_tracer *);
};
static int ds_selftest_bts_consistency(const struct bts_trace *trace)
{
int error = 0;
if (!trace) {
printk(KERN_CONT "failed to access trace...");
/* Bail out. Other tests are pointless. */
return -1;
}
if (!trace->read) {
printk(KERN_CONT "bts read not available...");
error = -1;
}
/* Do some sanity checks on the trace configuration. */
if (!trace->ds.n) {
printk(KERN_CONT "empty bts buffer...");
error = -1;
}
if (!trace->ds.size) {
printk(KERN_CONT "bad bts trace setup...");
error = -1;
}
if (trace->ds.end !=
(char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
printk(KERN_CONT "bad bts buffer setup...");
error = -1;
}
/*
* We allow top in [begin; end], since its not clear when the
* overflow adjustment happens: after the increment or before the
* write.
*/
if ((trace->ds.top < trace->ds.begin) ||
(trace->ds.end < trace->ds.top)) {
printk(KERN_CONT "bts top out of bounds...");
error = -1;
}
return error;
}
static int ds_selftest_bts_read(struct bts_tracer *tracer,
const struct bts_trace *trace,
const void *from, const void *to)
{
const unsigned char *at;
/*
* Check a few things which do not belong to this test.
* They should be covered by other tests.
*/
if (!trace)
return -1;
if (!trace->read)
return -1;
if (to < from)
return -1;
if (from < trace->ds.begin)
return -1;
if (trace->ds.end < to)
return -1;
if (!trace->ds.size)
return -1;
/* Now to the test itself. */
for (at = from; (void *)at < to; at += trace->ds.size) {
struct bts_struct bts;
unsigned long index;
int error;
if (((void *)at - trace->ds.begin) % trace->ds.size) {
printk(KERN_CONT
"read from non-integer index...");
return -1;
}
index = ((void *)at - trace->ds.begin) / trace->ds.size;
memset(&bts, 0, sizeof(bts));
error = trace->read(tracer, at, &bts);
if (error < 0) {
printk(KERN_CONT
"error reading bts trace at [%lu] (0x%p)...",
index, at);
return error;
}
switch (bts.qualifier) {
case BTS_BRANCH:
break;
default:
printk(KERN_CONT
"unexpected bts entry %llu at [%lu] (0x%p)...",
bts.qualifier, index, at);
return -1;
}
}
return 0;
}
static void ds_selftest_bts_cpu(void *arg)
{
struct ds_selftest_bts_conf *conf = arg;
const struct bts_trace *trace;
void *top;
if (IS_ERR(conf->tracer)) {
conf->error = PTR_ERR(conf->tracer);
conf->tracer = NULL;
printk(KERN_CONT
"initialization failed (err: %d)...", conf->error);
return;
}
/* We should meanwhile have enough trace. */
conf->error = conf->suspend(conf->tracer);
if (conf->error < 0)
return;
/* Let's see if we can access the trace. */
trace = ds_read_bts(conf->tracer);
conf->error = ds_selftest_bts_consistency(trace);
if (conf->error < 0)
return;
/* If everything went well, we should have a few trace entries. */
if (trace->ds.top == trace->ds.begin) {
/*
* It is possible but highly unlikely that we got a
* buffer overflow and end up at exactly the same
* position we started from.
* Let's issue a warning, but continue.
*/
printk(KERN_CONT "no trace/overflow...");
}
/* Let's try to read the trace we collected. */
conf->error =
ds_selftest_bts_read(conf->tracer, trace,
trace->ds.begin, trace->ds.top);
if (conf->error < 0)
return;
/*
* Let's read the trace again.
* Since we suspended tracing, we should get the same result.
*/
top = trace->ds.top;
trace = ds_read_bts(conf->tracer);
conf->error = ds_selftest_bts_consistency(trace);
if (conf->error < 0)
return;
if (top != trace->ds.top) {
printk(KERN_CONT "suspend not working...");
conf->error = -1;
return;
}
/* Let's collect some more trace - see if resume is working. */
conf->error = conf->resume(conf->tracer);
if (conf->error < 0)
return;
conf->error = conf->suspend(conf->tracer);
if (conf->error < 0)
return;
trace = ds_read_bts(conf->tracer);
conf->error = ds_selftest_bts_consistency(trace);
if (conf->error < 0)
return;
if (trace->ds.top == top) {
/*
* It is possible but highly unlikely that we got a
* buffer overflow and end up at exactly the same
* position we started from.
* Let's issue a warning and check the full trace.
*/
printk(KERN_CONT
"no resume progress/overflow...");
conf->error =
ds_selftest_bts_read(conf->tracer, trace,
trace->ds.begin, trace->ds.end);
} else if (trace->ds.top < top) {
/*
* We had a buffer overflow - the entire buffer should
* contain trace records.
*/
conf->error =
ds_selftest_bts_read(conf->tracer, trace,
trace->ds.begin, trace->ds.end);
} else {
/*
* It is quite likely that the buffer did not overflow.
* Let's just check the delta trace.
*/
conf->error =
ds_selftest_bts_read(conf->tracer, trace, top,
trace->ds.top);
}
if (conf->error < 0)
return;
conf->error = 0;
}
static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
{
ds_suspend_bts(tracer);
return 0;
}
static int ds_resume_bts_wrap(struct bts_tracer *tracer)
{
ds_resume_bts(tracer);
return 0;
}
static void ds_release_bts_noirq_wrap(void *tracer)
{
(void)ds_release_bts_noirq(tracer);
}
static int ds_selftest_bts_bad_release_noirq(int cpu,
struct bts_tracer *tracer)
{
int error = -EPERM;
/* Try to release the tracer on the wrong cpu. */
get_cpu();
if (cpu != smp_processor_id()) {
error = ds_release_bts_noirq(tracer);
if (error != -EPERM)
printk(KERN_CONT "release on wrong cpu...");
}
put_cpu();
return error ? 0 : -1;
}
static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
{
struct bts_tracer *tracer;
int error;
/* Try to request cpu tracing while task tracing is active. */
tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
(size_t)-1, BTS_KERNEL);
error = PTR_ERR(tracer);
if (!IS_ERR(tracer)) {
ds_release_bts(tracer);
error = 0;
}
if (error != -EPERM)
printk(KERN_CONT "cpu/task tracing overlap...");
return error ? 0 : -1;
}
static int ds_selftest_bts_bad_request_task(void *buffer)
{
struct bts_tracer *tracer;
int error;
/* Try to request cpu tracing while task tracing is active. */
tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
(size_t)-1, BTS_KERNEL);
error = PTR_ERR(tracer);
if (!IS_ERR(tracer)) {
error = 0;
ds_release_bts(tracer);
}
if (error != -EPERM)
printk(KERN_CONT "task/cpu tracing overlap...");
return error ? 0 : -1;
}
int ds_selftest_bts(void)
{
struct ds_selftest_bts_conf conf;
unsigned char buffer[BUFFER_SIZE], *small_buffer;
unsigned long irq;
int cpu;
printk(KERN_INFO "[ds] bts selftest...");
conf.error = 0;
small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
get_online_cpus();
for_each_online_cpu(cpu) {
conf.suspend = ds_suspend_bts_wrap;
conf.resume = ds_resume_bts_wrap;
conf.tracer =
ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
ds_selftest_bts_cpu(&conf);
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_task(buffer);
ds_release_bts(conf.tracer);
if (conf.error < 0)
goto out;
conf.suspend = ds_suspend_bts_noirq;
conf.resume = ds_resume_bts_noirq;
conf.tracer =
ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
if (conf.error >= 0) {
conf.error =
ds_selftest_bts_bad_release_noirq(cpu,
conf.tracer);
/* We must not release the tracer twice. */
if (conf.error < 0)
conf.tracer = NULL;
}
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_task(buffer);
smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
conf.tracer, 1);
if (conf.error < 0)
goto out;
}
conf.suspend = ds_suspend_bts_wrap;
conf.resume = ds_resume_bts_wrap;
conf.tracer =
ds_request_bts_task(current, buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
ds_selftest_bts_cpu(&conf);
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
ds_release_bts(conf.tracer);
if (conf.error < 0)
goto out;
conf.suspend = ds_suspend_bts_noirq;
conf.resume = ds_resume_bts_noirq;
conf.tracer =
ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
local_irq_save(irq);
ds_selftest_bts_cpu(&conf);
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
ds_release_bts_noirq(conf.tracer);
local_irq_restore(irq);
if (conf.error < 0)
goto out;
conf.error = 0;
out:
put_online_cpus();
printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
return conf.error;
}
int ds_selftest_pebs(void)
{
return 0;
}
/*
* Debug Store support - selftest
*
*
* Copyright (C) 2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2009
*/
#ifdef CONFIG_X86_DS_SELFTEST
extern int ds_selftest_bts(void);
extern int ds_selftest_pebs(void);
#else
static inline int ds_selftest_bts(void) { return 0; }
static inline int ds_selftest_pebs(void) { return 0; }
#endif
......@@ -14,6 +14,7 @@
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
......@@ -45,6 +46,8 @@ void free_thread_xstate(struct task_struct *tsk)
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
void free_thread_info(struct thread_info *ti)
......@@ -83,8 +86,6 @@ void exit_thread(void)
put_cpu();
kfree(bp);
}
ds_exit_thread(current);
}
void flush_thread(void)
......
......@@ -290,7 +290,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.io_bitmap_max = 0;
}
ds_copy_thread(p, current);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
p->thread.debugctlmsr = 0;
......
......@@ -335,7 +335,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
goto out;
}
ds_copy_thread(p, me);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
p->thread.debugctlmsr = 0;
......
......@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/workqueue.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target,
}
#ifdef CONFIG_X86_PTRACE_BTS
/*
* A branch trace store context.
*
* Contexts may only be installed by ptrace_bts_config() and only for
* ptraced tasks.
*
* Contexts are destroyed when the tracee is detached from the tracer.
* The actual destruction work requires interrupts enabled, so the
* work is deferred and will be scheduled during __ptrace_unlink().
*
* Contexts hold an additional task_struct reference on the traced
* task, as well as a reference on the tracer's mm.
*
* Ptrace already holds a task_struct for the duration of ptrace operations,
* but since destruction is deferred, it may be executed after both
* tracer and tracee exited.
*/
struct bts_context {
/* The branch trace handle. */
struct bts_tracer *tracer;
/* The buffer used to store the branch trace and its size. */
void *buffer;
unsigned int size;
/* The mm that paid for the above buffer. */
struct mm_struct *mm;
/* The task this context belongs to. */
struct task_struct *task;
/* The signal to send on a bts buffer overflow. */
unsigned int bts_ovfl_signal;
/* The work struct to destroy a context. */
struct work_struct work;
};
static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
{
void *buffer = NULL;
int err = -ENOMEM;
err = account_locked_memory(current->mm, current->signal->rlim, size);
if (err < 0)
return err;
buffer = kzalloc(size, GFP_KERNEL);
if (!buffer)
goto out_refund;
context->buffer = buffer;
context->size = size;
context->mm = get_task_mm(current);
return 0;
out_refund:
refund_locked_memory(current->mm, size);
return err;
}
static inline void free_bts_buffer(struct bts_context *context)
{
if (!context->buffer)
return;
kfree(context->buffer);
context->buffer = NULL;
refund_locked_memory(context->mm, context->size);
context->size = 0;
mmput(context->mm);
context->mm = NULL;
}
static void free_bts_context_work(struct work_struct *w)
{
struct bts_context *context;
context = container_of(w, struct bts_context, work);
ds_release_bts(context->tracer);
put_task_struct(context->task);
free_bts_buffer(context);
kfree(context);
}
static inline void free_bts_context(struct bts_context *context)
{
INIT_WORK(&context->work, free_bts_context_work);
schedule_work(&context->work);
}
static inline struct bts_context *alloc_bts_context(struct task_struct *task)
{
struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
if (context) {
context->task = task;
task->bts = context;
get_task_struct(task);
}
return context;
}
static int ptrace_bts_read_record(struct task_struct *child, size_t index,
struct bts_struct __user *out)
{
struct bts_context *context;
const struct bts_trace *trace;
struct bts_struct bts;
const unsigned char *at;
int error;
trace = ds_read_bts(child->bts);
context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace)
return -EPERM;
return -ESRCH;
at = trace->ds.top - ((index + 1) * trace->ds.size);
if ((void *)at < trace->ds.begin)
......@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
if (!trace->read)
return -EOPNOTSUPP;
error = trace->read(child->bts, at, &bts);
error = trace->read(context->tracer, at, &bts);
if (error < 0)
return error;
......@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child,
long size,
struct bts_struct __user *out)
{
struct bts_context *context;
const struct bts_trace *trace;
const unsigned char *at;
int error, drained = 0;
trace = ds_read_bts(child->bts);
context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace)
return -EPERM;
return -ESRCH;
if (!trace->read)
return -EOPNOTSUPP;
......@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child,
for (at = trace->ds.begin; (void *)at < trace->ds.top;
out++, drained++, at += trace->ds.size) {
struct bts_struct bts;
int error;
error = trace->read(child->bts, at, &bts);
error = trace->read(context->tracer, at, &bts);
if (error < 0)
return error;
......@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child,
memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
error = ds_reset_bts(child->bts);
error = ds_reset_bts(context->tracer);
if (error < 0)
return error;
return drained;
}
static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
{
child->bts_buffer = alloc_locked_buffer(size);
if (!child->bts_buffer)
return -ENOMEM;
child->bts_size = size;
return 0;
}
static void ptrace_bts_free_buffer(struct task_struct *child)
{
free_locked_buffer(child->bts_buffer, child->bts_size);
child->bts_buffer = NULL;
child->bts_size = 0;
}
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
{
struct bts_context *context;
struct ptrace_bts_config cfg;
unsigned int flags = 0;
......@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child,
if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
return -EFAULT;
if (child->bts) {
ds_release_bts(child->bts);
child->bts = NULL;
}
context = child->bts;
if (!context)
context = alloc_bts_context(child);
if (!context)
return -ENOMEM;
if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal)
return -EINVAL;
child->thread.bts_ovfl_signal = cfg.signal;
return -EOPNOTSUPP;
context->bts_ovfl_signal = cfg.signal;
}
if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
(cfg.size != child->bts_size)) {
int error;
ds_release_bts(context->tracer);
context->tracer = NULL;
ptrace_bts_free_buffer(child);
if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
int err;
error = ptrace_bts_allocate_buffer(child, cfg.size);
if (error < 0)
return error;
free_bts_buffer(context);
if (!cfg.size)
return 0;
err = alloc_bts_buffer(context, cfg.size);
if (err < 0)
return err;
}
if (cfg.flags & PTRACE_BTS_O_TRACE)
......@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child,
if (cfg.flags & PTRACE_BTS_O_SCHED)
flags |= BTS_TIMESTAMPS;
child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
/* ovfl = */ NULL, /* th = */ (size_t)-1,
flags);
if (IS_ERR(child->bts)) {
int error = PTR_ERR(child->bts);
ptrace_bts_free_buffer(child);
child->bts = NULL;
context->tracer =
ds_request_bts_task(child, context->buffer, context->size,
NULL, (size_t)-1, flags);
if (unlikely(IS_ERR(context->tracer))) {
int error = PTR_ERR(context->tracer);
free_bts_buffer(context);
context->tracer = NULL;
return error;
}
......@@ -727,19 +832,24 @@ static int ptrace_bts_status(struct task_struct *child,
long cfg_size,
struct ptrace_bts_config __user *ucfg)
{
struct bts_context *context;
const struct bts_trace *trace;
struct ptrace_bts_config cfg;
context = child->bts;
if (!context)
return -ESRCH;
if (cfg_size < sizeof(cfg))
return -EIO;
trace = ds_read_bts(child->bts);
trace = ds_read_bts(context->tracer);
if (!trace)
return -EPERM;
return -ESRCH;
memset(&cfg, 0, sizeof(cfg));
cfg.size = trace->ds.end - trace->ds.begin;
cfg.signal = child->thread.bts_ovfl_signal;
cfg.signal = context->bts_ovfl_signal;
cfg.bts_size = sizeof(struct bts_struct);
if (cfg.signal)
......@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child,
static int ptrace_bts_clear(struct task_struct *child)
{
struct bts_context *context;
const struct bts_trace *trace;
trace = ds_read_bts(child->bts);
context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace)
return -EPERM;
return -ESRCH;
memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
return ds_reset_bts(child->bts);
return ds_reset_bts(context->tracer);
}
static int ptrace_bts_size(struct task_struct *child)
{
struct bts_context *context;
const struct bts_trace *trace;
trace = ds_read_bts(child->bts);
context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace)
return -EPERM;
return -ESRCH;
return (trace->ds.top - trace->ds.begin) / trace->ds.size;
}
static void ptrace_bts_fork(struct task_struct *tsk)
{
tsk->bts = NULL;
tsk->bts_buffer = NULL;
tsk->bts_size = 0;
tsk->thread.bts_ovfl_signal = 0;
}
static void ptrace_bts_untrace(struct task_struct *child)
/*
* Called from __ptrace_unlink() after the child has been moved back
* to its original parent.
*/
void ptrace_bts_untrace(struct task_struct *child)
{
if (unlikely(child->bts)) {
ds_release_bts(child->bts);
free_bts_context(child->bts);
child->bts = NULL;
/* We cannot update total_vm and locked_vm since
child's mm is already gone. But we can reclaim the
memory. */
kfree(child->bts_buffer);
child->bts_buffer = NULL;
child->bts_size = 0;
}
}
static void ptrace_bts_detach(struct task_struct *child)
{
/*
* Ptrace_detach() races with ptrace_untrace() in case
* the child dies and is reaped by another thread.
*
* We only do the memory accounting at this point and
* leave the buffer deallocation and the bts tracer
* release to ptrace_bts_untrace() which will be called
* later on with tasklist_lock held.
*/
release_locked_buffer(child->bts_buffer, child->bts_size);
}
#else
static inline void ptrace_bts_fork(struct task_struct *tsk) {}
static inline void ptrace_bts_detach(struct task_struct *child) {}
static inline void ptrace_bts_untrace(struct task_struct *child) {}
#endif /* CONFIG_X86_PTRACE_BTS */
void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
{
ptrace_bts_fork(child);
}
void x86_ptrace_untrace(struct task_struct *child)
{
ptrace_bts_untrace(child);
}
/*
* Called by kernel/ptrace.c when detaching..
*
......@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child)
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
ptrace_bts_detach(child);
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
......
......@@ -19,6 +19,7 @@ struct anon_vma;
struct file_ra_state;
struct user_struct;
struct writeback_control;
struct rlimit;
#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr;
......@@ -1319,8 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page,
int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
void vmemmap_populate_print_last(void);
extern void *alloc_locked_buffer(size_t size);
extern void free_locked_buffer(void *buffer, size_t size);
extern void release_locked_buffer(void *buffer, size_t size);
extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
size_t size);
extern void refund_locked_memory(struct mm_struct *mm, size_t size);
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
......@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent);
extern void __ptrace_unlink(struct task_struct *child);
extern void exit_ptrace(struct task_struct *tracer);
extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
#define PTRACE_MODE_READ 1
#define PTRACE_MODE_ATTACH 2
/* Returns 0 on success, -errno on denial. */
......@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task)
#define arch_ptrace_untrace(task) do { } while (0)
#endif
#ifndef arch_ptrace_fork
/*
* Do machine-specific work to initialize a new task.
*
* This is called from copy_process().
*/
#define arch_ptrace_fork(child, clone_flags) do { } while (0)
#endif
extern int task_current_syscall(struct task_struct *target, long *callno,
unsigned long args[6], unsigned int maxargs,
unsigned long *sp, unsigned long *pc);
......
......@@ -96,8 +96,8 @@ struct exec_domain;
struct futex_pi_state;
struct robust_list_head;
struct bio;
struct bts_tracer;
struct fs_struct;
struct bts_context;
/*
* List of flags we want to share for kernel threads,
......@@ -1209,18 +1209,11 @@ struct task_struct {
struct list_head ptraced;
struct list_head ptrace_entry;
#ifdef CONFIG_X86_PTRACE_BTS
/*
* This is the tracer handle for the ptrace BTS extension.
* This field actually belongs to the ptracer task.
*/
struct bts_tracer *bts;
/*
* The buffer to hold the BTS data.
*/
void *bts_buffer;
size_t bts_size;
#endif /* CONFIG_X86_PTRACE_BTS */
struct bts_context *bts;
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
......@@ -2003,8 +1996,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
extern char *get_task_comm(char *to, struct task_struct *tsk);
#ifdef CONFIG_SMP
extern void wait_task_context_switch(struct task_struct *p);
extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
#else
static inline void wait_task_context_switch(struct task_struct *p) {}
static inline unsigned long wait_task_inactive(struct task_struct *p,
long match_state)
{
......
......@@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
......
......@@ -1088,8 +1088,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
if (unlikely(current->ptrace))
ptrace_fork(p, clone_flags);
p->bts = NULL;
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
......
......@@ -24,16 +24,6 @@
#include <linux/uaccess.h>
/*
* Initialize a new task whose father had been ptraced.
*
* Called from copy_process().
*/
void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
{
arch_ptrace_fork(child, clone_flags);
}
/*
* ptrace a task: make the debugger its new parent and
* move it to the ptrace list.
......
......@@ -2010,6 +2010,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
return 1;
}
/*
* wait_task_context_switch - wait for a thread to complete at least one
* context switch.
*
* @p must not be current.
*/
void wait_task_context_switch(struct task_struct *p)
{
unsigned long nvcsw, nivcsw, flags;
int running;
struct rq *rq;
nvcsw = p->nvcsw;
nivcsw = p->nivcsw;
for (;;) {
/*
* The runqueue is assigned before the actual context
* switch. We need to take the runqueue lock.
*
* We could check initially without the lock but it is
* very likely that we need to take the lock in every
* iteration.
*/
rq = task_rq_lock(p, &flags);
running = task_running(rq, p);
task_rq_unlock(rq, &flags);
if (likely(!running))
break;
/*
* The switch count is incremented before the actual
* context switch. We thus wait for two switches to be
* sure at least one completed.
*/
if ((p->nvcsw - nvcsw) > 1)
break;
if ((p->nivcsw - nivcsw) > 1)
break;
cpu_relax();
}
}
/*
* wait_task_inactive - wait for a thread to unschedule.
*
......
......@@ -15,12 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
#
# Make the trace clocks available generally: it's infrastructure
# relied on by ptrace for example:
#
obj-y += trace_clock.o
obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
......
......@@ -538,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
......
/*
* h/w branch tracer for x86 based on bts
* h/w branch tracer for x86 based on BTS
*
* Copyright (C) 2008-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
*/
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
......@@ -15,110 +14,119 @@
#include <asm/ds.h>
#include "trace.h"
#include "trace_output.h"
#include "trace.h"
#define SIZEOF_BTS (1 << 13)
#define BTS_BUFFER_SIZE (1 << 13)
/*
* The tracer lock protects the below per-cpu tracer array.
* It needs to be held to:
* - start tracing on all cpus
* - stop tracing on all cpus
* - start tracing on a single hotplug cpu
* - stop tracing on a single hotplug cpu
* - read the trace from all cpus
* - read the trace from a single cpu
*/
static DEFINE_SPINLOCK(bts_tracer_lock);
static DEFINE_PER_CPU(struct bts_tracer *, tracer);
static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
#define this_tracer per_cpu(tracer, smp_processor_id())
#define this_buffer per_cpu(buffer, smp_processor_id())
static int __read_mostly trace_hw_branches_enabled;
static int trace_hw_branches_enabled __read_mostly;
static int trace_hw_branches_suspended __read_mostly;
static struct trace_array *hw_branch_trace __read_mostly;
/*
* Start tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_start_cpu(void *arg)
static void bts_trace_init_cpu(int cpu)
{
if (this_tracer)
ds_release_bts(this_tracer);
this_tracer =
ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
/* ovfl = */ NULL, /* th = */ (size_t)-1,
BTS_KERNEL);
if (IS_ERR(this_tracer)) {
this_tracer = NULL;
return;
}
per_cpu(tracer, cpu) =
ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
if (IS_ERR(per_cpu(tracer, cpu)))
per_cpu(tracer, cpu) = NULL;
}
static void bts_trace_start(struct trace_array *tr)
static int bts_trace_init(struct trace_array *tr)
{
spin_lock(&bts_tracer_lock);
int cpu;
hw_branch_trace = tr;
trace_hw_branches_enabled = 0;
get_online_cpus();
for_each_online_cpu(cpu) {
bts_trace_init_cpu(cpu);
on_each_cpu(bts_trace_start_cpu, NULL, 1);
if (likely(per_cpu(tracer, cpu)))
trace_hw_branches_enabled = 1;
}
trace_hw_branches_suspended = 0;
put_online_cpus();
spin_unlock(&bts_tracer_lock);
/* If we could not enable tracing on a single cpu, we fail. */
return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
}
/*
* Stop tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_stop_cpu(void *arg)
static void bts_trace_reset(struct trace_array *tr)
{
if (this_tracer) {
ds_release_bts(this_tracer);
this_tracer = NULL;
int cpu;
get_online_cpus();
for_each_online_cpu(cpu) {
if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
}
trace_hw_branches_enabled = 0;
trace_hw_branches_suspended = 0;
put_online_cpus();
}
static void bts_trace_stop(struct trace_array *tr)
static void bts_trace_start(struct trace_array *tr)
{
spin_lock(&bts_tracer_lock);
int cpu;
trace_hw_branches_enabled = 0;
on_each_cpu(bts_trace_stop_cpu, NULL, 1);
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 0;
put_online_cpus();
}
spin_unlock(&bts_tracer_lock);
static void bts_trace_stop(struct trace_array *tr)
{
int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 1;
put_online_cpus();
}
static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
spin_lock(&bts_tracer_lock);
if (!trace_hw_branches_enabled)
goto out;
int cpu = (long)hcpu;
switch (action) {
case CPU_ONLINE:
case CPU_DOWN_FAILED:
smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
/* The notification is sent with interrupts enabled. */
if (trace_hw_branches_enabled) {
bts_trace_init_cpu(cpu);
if (trace_hw_branches_suspended &&
likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
}
break;
case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
break;
/* The notification is sent with interrupts enabled. */
if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
}
out:
spin_unlock(&bts_tracer_lock);
return NOTIFY_DONE;
}
......@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
.notifier_call = bts_hotcpu_handler
};
static int bts_trace_init(struct trace_array *tr)
{
hw_branch_trace = tr;
bts_trace_start(tr);
return 0;
}
static void bts_trace_reset(struct trace_array *tr)
{
bts_trace_stop(tr);
}
static void bts_trace_print_header(struct seq_file *m)
{
seq_puts(m, "# CPU# TO <- FROM\n");
......@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m)
static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
{
unsigned long symflags = TRACE_ITER_SYM_OFFSET;
struct trace_entry *entry = iter->ent;
struct trace_seq *seq = &iter->seq;
struct hw_branch_entry *it;
unsigned long symflags = TRACE_ITER_SYM_OFFSET;
trace_assign_type(it, entry);
......@@ -226,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
/*
* Collect the trace on the current cpu and write it into the ftrace buffer.
*
* pre: bts_tracer_lock must be locked
* pre: tracing must be suspended on the current cpu
*/
static void trace_bts_cpu(void *arg)
{
struct trace_array *tr = (struct trace_array *) arg;
struct trace_array *tr = (struct trace_array *)arg;
const struct bts_trace *trace;
unsigned char *at;
......@@ -243,10 +237,9 @@ static void trace_bts_cpu(void *arg)
if (unlikely(!this_tracer))
return;
ds_suspend_bts(this_tracer);
trace = ds_read_bts(this_tracer);
if (!trace)
goto out;
return;
for (at = trace->ds.top; (void *)at < trace->ds.end;
at += trace->ds.size)
......@@ -255,18 +248,27 @@ static void trace_bts_cpu(void *arg)
for (at = trace->ds.begin; (void *)at < trace->ds.top;
at += trace->ds.size)
trace_bts_at(trace, at);
out:
ds_resume_bts(this_tracer);
}
static void trace_bts_prepare(struct trace_iterator *iter)
{
spin_lock(&bts_tracer_lock);
int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
/*
* We need to collect the trace on the respective cpu since ftrace
* implicitly adds the record for the current cpu.
* Once that is more flexible, we could collect the data from any cpu.
*/
on_each_cpu(trace_bts_cpu, iter->tr, 1);
spin_unlock(&bts_tracer_lock);
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
put_online_cpus();
}
static void trace_bts_close(struct trace_iterator *iter)
......@@ -276,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter)
void trace_hw_branch_oops(void)
{
spin_lock(&bts_tracer_lock);
if (this_tracer) {
ds_suspend_bts_noirq(this_tracer);
trace_bts_cpu(hw_branch_trace);
spin_unlock(&bts_tracer_lock);
ds_resume_bts_noirq(this_tracer);
}
}
struct tracer bts_tracer __read_mostly =
......@@ -293,7 +295,10 @@ struct tracer bts_tracer __read_mostly =
.start = bts_trace_start,
.stop = bts_trace_stop,
.open = trace_bts_prepare,
.close = trace_bts_close
.close = trace_bts_close,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_hw_branches,
#endif /* CONFIG_FTRACE_SELFTEST */
};
__init static int init_bts_trace(void)
......
......@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
return 1;
}
return 0;
......@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
#else
# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
* Simple verification test of ftrace function tracer.
* Enable ftrace, sleep 1/10 second, and then read the trace
......@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
#ifdef CONFIG_HW_BRANCH_TRACER
int
trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr)
{
struct trace_iterator *iter;
struct tracer tracer;
unsigned long count;
int ret;
if (!trace->open) {
printk(KERN_CONT "missing open function...");
return -1;
}
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
/*
* The hw-branch tracer needs to collect the trace from the various
* cpu trace buffers - before tracing is stopped.
*/
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
memcpy(&tracer, trace, sizeof(tracer));
iter->trace = &tracer;
iter->tr = tr;
iter->pos = -1;
mutex_init(&iter->mutex);
trace->open(iter);
mutex_destroy(&iter->mutex);
kfree(iter);
tracing_stop();
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
if (!ret && !count) {
printk(KERN_CONT "no entries found..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */
......@@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user)
free_uid(user);
}
void *alloc_locked_buffer(size_t size)
int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
size_t size)
{
unsigned long rlim, vm, pgsz;
void *buffer = NULL;
unsigned long lim, vm, pgsz;
int error = -ENOMEM;
pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
vm = current->mm->total_vm + pgsz;
if (rlim < vm)
goto out;
down_write(&mm->mmap_sem);
rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
vm = current->mm->locked_vm + pgsz;
if (rlim < vm)
lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
vm = mm->total_vm + pgsz;
if (lim < vm)
goto out;
buffer = kzalloc(size, GFP_KERNEL);
if (!buffer)
lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
vm = mm->locked_vm + pgsz;
if (lim < vm)
goto out;
current->mm->total_vm += pgsz;
current->mm->locked_vm += pgsz;
mm->total_vm += pgsz;
mm->locked_vm += pgsz;
error = 0;
out:
up_write(&current->mm->mmap_sem);
return buffer;
up_write(&mm->mmap_sem);
return error;
}
void release_locked_buffer(void *buffer, size_t size)
void refund_locked_memory(struct mm_struct *mm, size_t size)
{
unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
current->mm->total_vm -= pgsz;
current->mm->locked_vm -= pgsz;
up_write(&current->mm->mmap_sem);
}
down_write(&mm->mmap_sem);
void free_locked_buffer(void *buffer, size_t size)
{
release_locked_buffer(buffer, size);
mm->total_vm -= pgsz;
mm->locked_vm -= pgsz;
kfree(buffer);
up_write(&mm->mmap_sem);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment